xref: /llvm-project/llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9-SDAG %s
3; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9-GISEL %s
4; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-SDAG %s
5; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-GISEL %s
6target datalayout = "A5"
7
8define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform(i32 %n) {
9; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform:
10; GFX9-SDAG:       ; %bb.0:
11; GFX9-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x0
12; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, s17
13; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, 0
14; GFX9-SDAG-NEXT:    s_movk_i32 s32, 0x400
15; GFX9-SDAG-NEXT:    s_mov_b32 s5, s32
16; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
17; GFX9-SDAG-NEXT:    s_lshl_b32 s4, s4, 2
18; GFX9-SDAG-NEXT:    s_add_i32 s4, s4, 15
19; GFX9-SDAG-NEXT:    s_and_b32 s4, s4, -16
20; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7b
21; GFX9-SDAG-NEXT:    s_lshl_b32 s4, s4, 6
22; GFX9-SDAG-NEXT:    s_mov_b32 s33, 0
23; GFX9-SDAG-NEXT:    s_add_i32 s32, s5, s4
24; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s5
25; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
26; GFX9-SDAG-NEXT:    s_endpgm
27;
28; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform:
29; GFX9-GISEL:       ; %bb.0:
30; GFX9-GISEL-NEXT:    s_load_dword s5, s[8:9], 0x0
31; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, s17
32; GFX9-GISEL-NEXT:    s_movk_i32 s32, 0x400
33; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
34; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
35; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
36; GFX9-GISEL-NEXT:    s_lshl2_add_u32 s5, s5, 15
37; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, -16
38; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
39; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
40; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s5, 6
41; GFX9-GISEL-NEXT:    s_mov_b32 s33, 0
42; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
43; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
44; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
45; GFX9-GISEL-NEXT:    s_endpgm
46;
47; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform:
48; GFX11-SDAG:       ; %bb.0:
49; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x0
50; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7b
51; GFX11-SDAG-NEXT:    s_mov_b32 s32, 16
52; GFX11-SDAG-NEXT:    s_mov_b32 s33, 0
53; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
54; GFX11-SDAG-NEXT:    scratch_store_b32 off, v0, s1 dlc
55; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
56; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
57; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
58; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
59; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 15
60; GFX11-SDAG-NEXT:    s_and_b32 s0, s0, -16
61; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
62; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 5
63; GFX11-SDAG-NEXT:    s_add_i32 s32, s1, s0
64; GFX11-SDAG-NEXT:    s_endpgm
65;
66; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform:
67; GFX11-GISEL:       ; %bb.0:
68; GFX11-GISEL-NEXT:    s_load_b32 s1, s[4:5], 0x0
69; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
70; GFX11-GISEL-NEXT:    s_mov_b32 s32, 16
71; GFX11-GISEL-NEXT:    s_mov_b32 s33, 0
72; GFX11-GISEL-NEXT:    s_mov_b32 s0, s32
73; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s0 dlc
74; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
75; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
76; GFX11-GISEL-NEXT:    s_lshl2_add_u32 s1, s1, 15
77; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
78; GFX11-GISEL-NEXT:    s_and_b32 s1, s1, -16
79; GFX11-GISEL-NEXT:    s_lshl_b32 s1, s1, 5
80; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
81; GFX11-GISEL-NEXT:    s_add_u32 s32, s0, s1
82; GFX11-GISEL-NEXT:    s_endpgm
83  %alloca = alloca i32, i32 %n, addrspace(5)
84  store volatile i32 123, ptr addrspace(5) %alloca
85  ret void
86}
87
88define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform_over_aligned(i32 %n) {
89; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned:
90; GFX9-SDAG:       ; %bb.0:
91; GFX9-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x0
92; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, s17
93; GFX9-SDAG-NEXT:    s_movk_i32 s32, 0x2000
94; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, 0
95; GFX9-SDAG-NEXT:    s_add_i32 s5, s32, 0x1fff
96; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
97; GFX9-SDAG-NEXT:    s_lshl_b32 s4, s4, 2
98; GFX9-SDAG-NEXT:    s_add_i32 s4, s4, 15
99; GFX9-SDAG-NEXT:    s_and_b32 s5, s5, 0xffffe000
100; GFX9-SDAG-NEXT:    s_and_b32 s4, s4, -16
101; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 10
102; GFX9-SDAG-NEXT:    s_lshl_b32 s4, s4, 6
103; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s5
104; GFX9-SDAG-NEXT:    s_mov_b32 s33, 0
105; GFX9-SDAG-NEXT:    s_add_i32 s32, s5, s4
106; GFX9-SDAG-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
107; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
108; GFX9-SDAG-NEXT:    s_endpgm
109;
110; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned:
111; GFX9-GISEL:       ; %bb.0:
112; GFX9-GISEL-NEXT:    s_load_dword s4, s[8:9], 0x0
113; GFX9-GISEL-NEXT:    s_movk_i32 s32, 0x2000
114; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, s17
115; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
116; GFX9-GISEL-NEXT:    s_add_u32 s5, s32, 0x1fff
117; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
118; GFX9-GISEL-NEXT:    s_lshl2_add_u32 s4, s4, 15
119; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, 0xffffe000
120; GFX9-GISEL-NEXT:    s_and_b32 s4, s4, -16
121; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 10
122; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
123; GFX9-GISEL-NEXT:    s_lshl_b32 s4, s4, 6
124; GFX9-GISEL-NEXT:    s_mov_b32 s33, 0
125; GFX9-GISEL-NEXT:    s_add_u32 s32, s5, s4
126; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
127; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
128; GFX9-GISEL-NEXT:    s_endpgm
129;
130; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned:
131; GFX11-SDAG:       ; %bb.0:
132; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x0
133; GFX11-SDAG-NEXT:    s_movk_i32 s32, 0x80
134; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 10
135; GFX11-SDAG-NEXT:    s_add_i32 s1, s32, 0xfff
136; GFX11-SDAG-NEXT:    s_mov_b32 s33, 0
137; GFX11-SDAG-NEXT:    s_and_b32 s1, s1, 0xfffff000
138; GFX11-SDAG-NEXT:    scratch_store_b32 off, v0, s1 dlc
139; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
140; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
141; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
142; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
143; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 15
144; GFX11-SDAG-NEXT:    s_and_b32 s0, s0, -16
145; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
146; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 5
147; GFX11-SDAG-NEXT:    s_add_i32 s32, s1, s0
148; GFX11-SDAG-NEXT:    s_endpgm
149;
150; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform_over_aligned:
151; GFX11-GISEL:       ; %bb.0:
152; GFX11-GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x0
153; GFX11-GISEL-NEXT:    s_movk_i32 s32, 0x80
154; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 10
155; GFX11-GISEL-NEXT:    s_add_u32 s1, s32, 0xfff
156; GFX11-GISEL-NEXT:    s_mov_b32 s33, 0
157; GFX11-GISEL-NEXT:    s_and_b32 s1, s1, 0xfffff000
158; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
159; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
160; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
161; GFX11-GISEL-NEXT:    s_lshl2_add_u32 s0, s0, 15
162; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
163; GFX11-GISEL-NEXT:    s_and_b32 s0, s0, -16
164; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
165; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
166; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
167; GFX11-GISEL-NEXT:    s_endpgm
168  %alloca = alloca i32, i32 %n, align 128, addrspace(5)
169  store volatile i32 10, ptr addrspace(5) %alloca
170  ret void
171}
172
173define amdgpu_kernel void @test_dynamic_stackalloc_kernel_uniform_under_aligned(i32 %n) {
174; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_under_aligned:
175; GFX9-SDAG:       ; %bb.0:
176; GFX9-SDAG-NEXT:    s_load_dword s4, s[8:9], 0x0
177; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, s17
178; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, 0
179; GFX9-SDAG-NEXT:    s_movk_i32 s32, 0x400
180; GFX9-SDAG-NEXT:    s_mov_b32 s5, s32
181; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
182; GFX9-SDAG-NEXT:    s_lshl_b32 s4, s4, 2
183; GFX9-SDAG-NEXT:    s_add_i32 s4, s4, 15
184; GFX9-SDAG-NEXT:    s_and_b32 s4, s4, -16
185; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 22
186; GFX9-SDAG-NEXT:    s_lshl_b32 s4, s4, 6
187; GFX9-SDAG-NEXT:    s_mov_b32 s33, 0
188; GFX9-SDAG-NEXT:    s_add_i32 s32, s5, s4
189; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s5
190; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
191; GFX9-SDAG-NEXT:    s_endpgm
192;
193; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform_under_aligned:
194; GFX9-GISEL:       ; %bb.0:
195; GFX9-GISEL-NEXT:    s_load_dword s5, s[8:9], 0x0
196; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, s17
197; GFX9-GISEL-NEXT:    s_movk_i32 s32, 0x400
198; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
199; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
200; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
201; GFX9-GISEL-NEXT:    s_lshl2_add_u32 s5, s5, 15
202; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, -16
203; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 22
204; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
205; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s5, 6
206; GFX9-GISEL-NEXT:    s_mov_b32 s33, 0
207; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
208; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
209; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
210; GFX9-GISEL-NEXT:    s_endpgm
211;
212; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_uniform_under_aligned:
213; GFX11-SDAG:       ; %bb.0:
214; GFX11-SDAG-NEXT:    s_load_b32 s0, s[4:5], 0x0
215; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 22
216; GFX11-SDAG-NEXT:    s_mov_b32 s32, 16
217; GFX11-SDAG-NEXT:    s_mov_b32 s33, 0
218; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
219; GFX11-SDAG-NEXT:    scratch_store_b32 off, v0, s1 dlc
220; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
221; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
222; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
223; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
224; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 15
225; GFX11-SDAG-NEXT:    s_and_b32 s0, s0, -16
226; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
227; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 5
228; GFX11-SDAG-NEXT:    s_add_i32 s32, s1, s0
229; GFX11-SDAG-NEXT:    s_endpgm
230;
231; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_uniform_under_aligned:
232; GFX11-GISEL:       ; %bb.0:
233; GFX11-GISEL-NEXT:    s_load_b32 s1, s[4:5], 0x0
234; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 22
235; GFX11-GISEL-NEXT:    s_mov_b32 s32, 16
236; GFX11-GISEL-NEXT:    s_mov_b32 s33, 0
237; GFX11-GISEL-NEXT:    s_mov_b32 s0, s32
238; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s0 dlc
239; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
240; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
241; GFX11-GISEL-NEXT:    s_lshl2_add_u32 s1, s1, 15
242; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
243; GFX11-GISEL-NEXT:    s_and_b32 s1, s1, -16
244; GFX11-GISEL-NEXT:    s_lshl_b32 s1, s1, 5
245; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
246; GFX11-GISEL-NEXT:    s_add_u32 s32, s0, s1
247; GFX11-GISEL-NEXT:    s_endpgm
248  %alloca = alloca i32, i32 %n, align 2, addrspace(5)
249  store volatile i32 22, ptr addrspace(5) %alloca
250  ret void
251}
252
253define amdgpu_kernel void @test_dynamic_stackalloc_kernel_divergent() {
254; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent:
255; GFX9-SDAG:       ; %bb.0:
256; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, s17
257; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
258; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, 0
259; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
260; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
261; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
262; GFX9-SDAG-NEXT:    s_mov_b32 s33, 0
263; GFX9-SDAG-NEXT:    s_movk_i32 s32, 0x400
264; GFX9-SDAG-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
265; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
266; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
267; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
268; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
269; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
270; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB3_1
271; GFX9-SDAG-NEXT:  ; %bb.2:
272; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
273; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
274; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s6, 6, v0
275; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
276; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7b
277; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s4
278; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
279; GFX9-SDAG-NEXT:    s_endpgm
280;
281; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent:
282; GFX9-GISEL:       ; %bb.0:
283; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, s17
284; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
285; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
286; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
287; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
288; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
289; GFX9-GISEL-NEXT:    s_mov_b32 s33, 0
290; GFX9-GISEL-NEXT:    s_movk_i32 s32, 0x400
291; GFX9-GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
292; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
293; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
294; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
295; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
296; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
297; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
298; GFX9-GISEL-NEXT:  ; %bb.2:
299; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
300; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s6, 6
301; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
302; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
303; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
304; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
305; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
306; GFX9-GISEL-NEXT:    s_endpgm
307;
308; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent:
309; GFX11-SDAG:       ; %bb.0:
310; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
311; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
312; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
313; GFX11-SDAG-NEXT:    s_mov_b32 s33, 0
314; GFX11-SDAG-NEXT:    s_mov_b32 s32, 16
315; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
316; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
317; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
318; GFX11-SDAG-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
319; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
320; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
321; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
322; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
323; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
324; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
325; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
326; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB3_1
327; GFX11-SDAG-NEXT:  ; %bb.2:
328; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
329; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7b
330; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
331; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
332; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
333; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
334; GFX11-SDAG-NEXT:    s_endpgm
335;
336; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent:
337; GFX11-GISEL:       ; %bb.0:
338; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
339; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
340; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
341; GFX11-GISEL-NEXT:    s_mov_b32 s33, 0
342; GFX11-GISEL-NEXT:    s_mov_b32 s32, 16
343; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
344; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
345; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
346; GFX11-GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
347; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
348; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
349; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
350; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
351; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
352; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
353; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
354; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
355; GFX11-GISEL-NEXT:  ; %bb.2:
356; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
357; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
358; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
359; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
360; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
361; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
362; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
363; GFX11-GISEL-NEXT:    s_endpgm
364  %idx = call i32 @llvm.amdgcn.workitem.id.x()
365  %alloca = alloca float, i32 %idx, addrspace(5)
366  store volatile i32 123, ptr addrspace(5) %alloca
367  ret void
368}
369
370define amdgpu_kernel void @test_dynamic_stackalloc_kernel_divergent_over_aligned() {
371; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned:
372; GFX9-SDAG:       ; %bb.0:
373; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, s17
374; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
375; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, 0
376; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
377; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
378; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
379; GFX9-SDAG-NEXT:    s_mov_b32 s33, 0
380; GFX9-SDAG-NEXT:    s_movk_i32 s32, 0x2000
381; GFX9-SDAG-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
382; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
383; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
384; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
385; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
386; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
387; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB4_1
388; GFX9-SDAG-NEXT:  ; %bb.2:
389; GFX9-SDAG-NEXT:    s_add_i32 s4, s32, 0x1fff
390; GFX9-SDAG-NEXT:    s_and_b32 s4, s4, 0xffffe000
391; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
392; GFX9-SDAG-NEXT:    v_lshl_add_u32 v1, s6, 6, v0
393; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v1
394; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1bc
395; GFX9-SDAG-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
396; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
397; GFX9-SDAG-NEXT:    s_endpgm
398;
399; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned:
400; GFX9-GISEL:       ; %bb.0:
401; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, s17
402; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
403; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
404; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
405; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
406; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
407; GFX9-GISEL-NEXT:    s_mov_b32 s33, 0
408; GFX9-GISEL-NEXT:    s_movk_i32 s32, 0x2000
409; GFX9-GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
410; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
411; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
412; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
413; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
414; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
415; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
416; GFX9-GISEL-NEXT:  ; %bb.2:
417; GFX9-GISEL-NEXT:    s_add_u32 s5, s32, 0x1fff
418; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, 0xffffe000
419; GFX9-GISEL-NEXT:    s_lshl_b32 s4, s6, 6
420; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x1bc
421; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
422; GFX9-GISEL-NEXT:    s_add_u32 s32, s5, s4
423; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
424; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
425; GFX9-GISEL-NEXT:    s_endpgm
426;
427; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned:
428; GFX11-SDAG:       ; %bb.0:
429; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
430; GFX11-SDAG-NEXT:    s_movk_i32 s32, 0x80
431; GFX11-SDAG-NEXT:    s_mov_b32 s2, exec_lo
432; GFX11-SDAG-NEXT:    s_add_i32 s0, s32, 0xfff
433; GFX11-SDAG-NEXT:    s_mov_b32 s1, 0
434; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
435; GFX11-SDAG-NEXT:    s_and_b32 s0, s0, 0xfffff000
436; GFX11-SDAG-NEXT:    s_mov_b32 s33, 0
437; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
438; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
439; GFX11-SDAG-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
440; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s3, s2
441; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
442; GFX11-SDAG-NEXT:    v_readlane_b32 s4, v0, s3
443; GFX11-SDAG-NEXT:    s_bitset0_b32 s2, s3
444; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
445; GFX11-SDAG-NEXT:    s_max_u32 s1, s1, s4
446; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
447; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB4_1
448; GFX11-SDAG-NEXT:  ; %bb.2:
449; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s1, 5, s0
450; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1bc
451; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
452; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
453; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s0 dlc
454; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
455; GFX11-SDAG-NEXT:    s_endpgm
456;
457; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent_over_aligned:
458; GFX11-GISEL:       ; %bb.0:
459; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
460; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
461; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
462; GFX11-GISEL-NEXT:    s_mov_b32 s33, 0
463; GFX11-GISEL-NEXT:    s_movk_i32 s32, 0x80
464; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
465; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
466; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
467; GFX11-GISEL-NEXT:  .LBB4_1: ; =>This Inner Loop Header: Depth=1
468; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
469; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
470; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
471; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
472; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
473; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
474; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
475; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB4_1
476; GFX11-GISEL-NEXT:  ; %bb.2:
477; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x1bc
478; GFX11-GISEL-NEXT:    s_add_u32 s1, s32, 0xfff
479; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
480; GFX11-GISEL-NEXT:    s_and_b32 s1, s1, 0xfffff000
481; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
482; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
483; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
484; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
485; GFX11-GISEL-NEXT:    s_endpgm
486  %idx = call i32 @llvm.amdgcn.workitem.id.x()
487  %alloca = alloca i32, i32 %idx, align 128, addrspace(5)
488  store volatile i32 444, ptr addrspace(5) %alloca
489  ret void
490}
491
492define amdgpu_kernel void @test_dynamic_stackalloc_kernel_divergent_under_aligned() {
493; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_under_aligned:
494; GFX9-SDAG:       ; %bb.0:
495; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, s17
496; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, 0
497; GFX9-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
498; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
499; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
500; GFX9-SDAG-NEXT:    s_mov_b32 s33, 0
501; GFX9-SDAG-NEXT:    s_movk_i32 s32, 0x400
502; GFX9-SDAG-NEXT:  .LBB5_1: ; =>This Inner Loop Header: Depth=1
503; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
504; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
505; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
506; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
507; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
508; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB5_1
509; GFX9-SDAG-NEXT:  ; %bb.2:
510; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
511; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
512; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s6, 6, v0
513; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
514; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x29a
515; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s4
516; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
517; GFX9-SDAG-NEXT:    s_endpgm
518;
519; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent_under_aligned:
520; GFX9-GISEL:       ; %bb.0:
521; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, s17
522; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 4, 15
523; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
524; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
525; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
526; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
527; GFX9-GISEL-NEXT:    s_mov_b32 s33, 0
528; GFX9-GISEL-NEXT:    s_movk_i32 s32, 0x400
529; GFX9-GISEL-NEXT:  .LBB5_1: ; =>This Inner Loop Header: Depth=1
530; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
531; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
532; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
533; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
534; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
535; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB5_1
536; GFX9-GISEL-NEXT:  ; %bb.2:
537; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
538; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s6, 6
539; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x29a
540; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
541; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
542; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
543; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
544; GFX9-GISEL-NEXT:    s_endpgm
545;
546; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_divergent_under_aligned:
547; GFX11-SDAG:       ; %bb.0:
548; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
549; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
550; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
551; GFX11-SDAG-NEXT:    s_mov_b32 s33, 0
552; GFX11-SDAG-NEXT:    s_mov_b32 s32, 16
553; GFX11-SDAG-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
554; GFX11-SDAG-NEXT:  .LBB5_1: ; =>This Inner Loop Header: Depth=1
555; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
556; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
557; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
558; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
559; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
560; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
561; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
562; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB5_1
563; GFX11-SDAG-NEXT:  ; %bb.2:
564; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
565; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x29a
566; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
567; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
568; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
569; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
570; GFX11-SDAG-NEXT:    s_endpgm
571;
572; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_divergent_under_aligned:
573; GFX11-GISEL:       ; %bb.0:
574; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
575; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
576; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
577; GFX11-GISEL-NEXT:    s_mov_b32 s33, 0
578; GFX11-GISEL-NEXT:    s_mov_b32 s32, 16
579; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 4, 15
580; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
581; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
582; GFX11-GISEL-NEXT:  .LBB5_1: ; =>This Inner Loop Header: Depth=1
583; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
584; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
585; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
586; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
587; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
588; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
589; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
590; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB5_1
591; GFX11-GISEL-NEXT:  ; %bb.2:
592; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x29a
593; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
594; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
595; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
596; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
597; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
598; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
599; GFX11-GISEL-NEXT:    s_endpgm
600  %idx = call i32 @llvm.amdgcn.workitem.id.x()
601  %alloca = alloca i128, i32 %idx, align 2, addrspace(5)
602  store volatile i32 666, ptr addrspace(5) %alloca
603  ret void
604}
605
606define amdgpu_kernel void @test_dynamic_stackalloc_kernel_multiple_allocas(i32 %n, i32 %m) {
607; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_multiple_allocas:
608; GFX9-SDAG:       ; %bb.0: ; %entry
609; GFX9-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
610; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, s17
611; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, 0
612; GFX9-SDAG-NEXT:    s_mov_b32 s8, 0
613; GFX9-SDAG-NEXT:    s_mov_b32 s33, 0
614; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
615; GFX9-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
616; GFX9-SDAG-NEXT:    s_movk_i32 s32, 0x2000
617; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB6_4
618; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb.0
619; GFX9-SDAG-NEXT:    s_lshl_b32 s5, s5, 2
620; GFX9-SDAG-NEXT:    s_add_i32 s5, s5, 15
621; GFX9-SDAG-NEXT:    s_add_i32 s6, s32, 0xfff
622; GFX9-SDAG-NEXT:    s_and_b32 s5, s5, -16
623; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
624; GFX9-SDAG-NEXT:    s_and_b32 s9, s6, 0xfffff000
625; GFX9-SDAG-NEXT:    s_lshl_b32 s5, s5, 6
626; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
627; GFX9-SDAG-NEXT:    s_mov_b64 s[6:7], exec
628; GFX9-SDAG-NEXT:    s_add_i32 s32, s9, s5
629; GFX9-SDAG-NEXT:  .LBB6_2: ; =>This Inner Loop Header: Depth=1
630; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s5, s[6:7]
631; GFX9-SDAG-NEXT:    v_readlane_b32 s10, v0, s5
632; GFX9-SDAG-NEXT:    s_bitset0_b64 s[6:7], s5
633; GFX9-SDAG-NEXT:    s_max_u32 s8, s8, s10
634; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[6:7], 0
635; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB6_2
636; GFX9-SDAG-NEXT:  ; %bb.3:
637; GFX9-SDAG-NEXT:    s_mov_b32 s5, s32
638; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s5
639; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s8, 6, v0
640; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
641; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 3
642; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s9
643; GFX9-SDAG-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
644; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
645; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 4
646; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s5
647; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
648; GFX9-SDAG-NEXT:  .LBB6_4: ; %bb.1
649; GFX9-SDAG-NEXT:    s_lshl_b32 s4, s4, 2
650; GFX9-SDAG-NEXT:    s_add_i32 s4, s4, 15
651; GFX9-SDAG-NEXT:    s_and_b32 s4, s4, -16
652; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 1
653; GFX9-SDAG-NEXT:    s_lshl_b32 s4, s4, 6
654; GFX9-SDAG-NEXT:    s_mov_b32 s5, s32
655; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s33
656; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
657; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 2
658; GFX9-SDAG-NEXT:    s_add_i32 s32, s5, s4
659; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s5
660; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
661; GFX9-SDAG-NEXT:    s_endpgm
662;
663; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_multiple_allocas:
664; GFX9-GISEL:       ; %bb.0: ; %entry
665; GFX9-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
666; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, s17
667; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
668; GFX9-GISEL-NEXT:    s_mov_b32 s8, 0
669; GFX9-GISEL-NEXT:    s_mov_b32 s33, 0
670; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
671; GFX9-GISEL-NEXT:    s_cmp_lg_u32 s4, 0
672; GFX9-GISEL-NEXT:    s_movk_i32 s32, 0x2000
673; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB6_4
674; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb.0
675; GFX9-GISEL-NEXT:    s_lshl2_add_u32 s5, s5, 15
676; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, -16
677; GFX9-GISEL-NEXT:    s_lshl_b32 s6, s5, 6
678; GFX9-GISEL-NEXT:    s_add_u32 s5, s32, 0xfff
679; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, 0xfffff000
680; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
681; GFX9-GISEL-NEXT:    s_add_u32 s32, s5, s6
682; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
683; GFX9-GISEL-NEXT:    s_mov_b64 s[6:7], exec
684; GFX9-GISEL-NEXT:  .LBB6_2: ; =>This Inner Loop Header: Depth=1
685; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s9, s[6:7]
686; GFX9-GISEL-NEXT:    v_readlane_b32 s10, v0, s9
687; GFX9-GISEL-NEXT:    s_bitset0_b64 s[6:7], s9
688; GFX9-GISEL-NEXT:    s_max_u32 s8, s8, s10
689; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
690; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB6_2
691; GFX9-GISEL-NEXT:  ; %bb.3:
692; GFX9-GISEL-NEXT:    s_mov_b32 s6, s32
693; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 3
694; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
695; GFX9-GISEL-NEXT:    s_lshl_b32 s7, s8, 6
696; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
697; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
698; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 4
699; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s6
700; GFX9-GISEL-NEXT:    s_add_u32 s32, s6, s7
701; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
702; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
703; GFX9-GISEL-NEXT:  .LBB6_4: ; %bb.1
704; GFX9-GISEL-NEXT:    s_lshl2_add_u32 s4, s4, 15
705; GFX9-GISEL-NEXT:    s_mov_b32 s5, s32
706; GFX9-GISEL-NEXT:    s_and_b32 s4, s4, -16
707; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 1
708; GFX9-GISEL-NEXT:    s_lshl_b32 s4, s4, 6
709; GFX9-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], s33
710; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
711; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 2
712; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
713; GFX9-GISEL-NEXT:    s_add_u32 s32, s5, s4
714; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
715; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
716; GFX9-GISEL-NEXT:    s_endpgm
717;
718; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_multiple_allocas:
719; GFX11-SDAG:       ; %bb.0: ; %entry
720; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
721; GFX11-SDAG-NEXT:    s_mov_b32 s2, 0
722; GFX11-SDAG-NEXT:    s_mov_b32 s33, 0
723; GFX11-SDAG-NEXT:    s_movk_i32 s32, 0x80
724; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
725; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s0, 0
726; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB6_4
727; GFX11-SDAG-NEXT:  ; %bb.1: ; %bb.0
728; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
729; GFX11-SDAG-NEXT:    s_lshl_b32 s1, s1, 2
730; GFX11-SDAG-NEXT:    s_add_i32 s3, s32, 0x7ff
731; GFX11-SDAG-NEXT:    s_add_i32 s1, s1, 15
732; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
733; GFX11-SDAG-NEXT:    s_and_b32 s4, s1, -16
734; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
735; GFX11-SDAG-NEXT:    s_and_b32 s1, s3, 0xfffff800
736; GFX11-SDAG-NEXT:    s_lshl_b32 s3, s4, 5
737; GFX11-SDAG-NEXT:    s_add_i32 s32, s1, s3
738; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
739; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
740; GFX11-SDAG-NEXT:    s_mov_b32 s3, exec_lo
741; GFX11-SDAG-NEXT:  .LBB6_2: ; =>This Inner Loop Header: Depth=1
742; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
743; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s4, s3
744; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
745; GFX11-SDAG-NEXT:    v_readlane_b32 s5, v0, s4
746; GFX11-SDAG-NEXT:    s_bitset0_b32 s3, s4
747; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
748; GFX11-SDAG-NEXT:    s_max_u32 s2, s2, s5
749; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s3, 0
750; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB6_2
751; GFX11-SDAG-NEXT:  ; %bb.3:
752; GFX11-SDAG-NEXT:    s_mov_b32 s3, s32
753; GFX11-SDAG-NEXT:    v_dual_mov_b32 v1, 3 :: v_dual_mov_b32 v2, 4
754; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s2, 5, s3
755; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
756; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
757; GFX11-SDAG-NEXT:    scratch_store_b32 off, v2, s3 dlc
758; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
759; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
760; GFX11-SDAG-NEXT:  .LBB6_4: ; %bb.1
761; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 2
762; GFX11-SDAG-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
763; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 15
764; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
765; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
766; GFX11-SDAG-NEXT:    s_and_b32 s0, s0, -16
767; GFX11-SDAG-NEXT:    scratch_store_b32 off, v0, s33 dlc
768; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
769; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
770; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
771; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 5
772; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
773; GFX11-SDAG-NEXT:    s_add_i32 s32, s1, s0
774; GFX11-SDAG-NEXT:    s_endpgm
775;
776; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_multiple_allocas:
777; GFX11-GISEL:       ; %bb.0: ; %entry
778; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
779; GFX11-GISEL-NEXT:    s_mov_b32 s2, 0
780; GFX11-GISEL-NEXT:    s_mov_b32 s33, 0
781; GFX11-GISEL-NEXT:    s_movk_i32 s32, 0x80
782; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
783; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
784; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB6_4
785; GFX11-GISEL-NEXT:  ; %bb.1: ; %bb.0
786; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
787; GFX11-GISEL-NEXT:    s_lshl2_add_u32 s1, s1, 15
788; GFX11-GISEL-NEXT:    s_add_u32 s3, s32, 0x7ff
789; GFX11-GISEL-NEXT:    s_and_b32 s1, s1, -16
790; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1)
791; GFX11-GISEL-NEXT:    s_lshl_b32 s4, s1, 5
792; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
793; GFX11-GISEL-NEXT:    s_and_b32 s1, s3, 0xfffff800
794; GFX11-GISEL-NEXT:    s_mov_b32 s3, exec_lo
795; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s4
796; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
797; GFX11-GISEL-NEXT:  .LBB6_2: ; =>This Inner Loop Header: Depth=1
798; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s4, s3
799; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
800; GFX11-GISEL-NEXT:    v_readlane_b32 s5, v0, s4
801; GFX11-GISEL-NEXT:    s_bitset0_b32 s3, s4
802; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
803; GFX11-GISEL-NEXT:    s_max_u32 s2, s2, s5
804; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
805; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB6_2
806; GFX11-GISEL-NEXT:  ; %bb.3:
807; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, 3 :: v_dual_mov_b32 v1, 4
808; GFX11-GISEL-NEXT:    s_mov_b32 s3, s32
809; GFX11-GISEL-NEXT:    s_lshl_b32 s2, s2, 5
810; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
811; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
812; GFX11-GISEL-NEXT:    scratch_store_b32 off, v1, s3 dlc
813; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
814; GFX11-GISEL-NEXT:    s_add_u32 s32, s3, s2
815; GFX11-GISEL-NEXT:  .LBB6_4: ; %bb.1
816; GFX11-GISEL-NEXT:    s_lshl2_add_u32 s0, s0, 15
817; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
818; GFX11-GISEL-NEXT:    s_and_b32 s0, s0, -16
819; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
820; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
821; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s33 dlc
822; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
823; GFX11-GISEL-NEXT:    scratch_store_b32 off, v1, s1 dlc
824; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
825; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
826; GFX11-GISEL-NEXT:    s_endpgm
827entry:
828  %cond = icmp eq i32 %n, 0
829  %alloca1 = alloca i32, i32 8, addrspace(5)
830  %alloca2 = alloca i17, i32 %n, addrspace(5)
831  br i1 %cond, label %bb.0, label %bb.1
832bb.0:
833  %idx = call i32 @llvm.amdgcn.workitem.id.x()
834  %alloca3 = alloca i32, i32 %m, align 64, addrspace(5)
835  %alloca4 = alloca i32, i32 %idx, align 4, addrspace(5)
836  store volatile i32 3, ptr addrspace(5) %alloca3
837  store volatile i32 4, ptr addrspace(5) %alloca4
838  br label %bb.1
839bb.1:
840  store volatile i32 1, ptr addrspace(5) %alloca1
841  store volatile i32 2, ptr addrspace(5) %alloca2
842  ret void
843}
844
845define amdgpu_kernel void @test_dynamic_stackalloc_kernel_control_flow(i32 %n, i32 %m) {
846; GFX9-SDAG-LABEL: test_dynamic_stackalloc_kernel_control_flow:
847; GFX9-SDAG:       ; %bb.0: ; %entry
848; GFX9-SDAG-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
849; GFX9-SDAG-NEXT:    s_add_u32 s0, s0, s17
850; GFX9-SDAG-NEXT:    s_addc_u32 s1, s1, 0
851; GFX9-SDAG-NEXT:    s_mov_b32 s33, 0
852; GFX9-SDAG-NEXT:    s_movk_i32 s32, 0x1000
853; GFX9-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
854; GFX9-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
855; GFX9-SDAG-NEXT:    s_mov_b32 s4, 0
856; GFX9-SDAG-NEXT:    s_cbranch_scc0 .LBB7_6
857; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb.1
858; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
859; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
860; GFX9-SDAG-NEXT:    s_mov_b64 s[6:7], exec
861; GFX9-SDAG-NEXT:  .LBB7_2: ; =>This Inner Loop Header: Depth=1
862; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s8, s[6:7]
863; GFX9-SDAG-NEXT:    v_readlane_b32 s9, v0, s8
864; GFX9-SDAG-NEXT:    s_bitset0_b64 s[6:7], s8
865; GFX9-SDAG-NEXT:    s_max_u32 s4, s4, s9
866; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[6:7], 0
867; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB7_2
868; GFX9-SDAG-NEXT:  ; %bb.3:
869; GFX9-SDAG-NEXT:    s_mov_b32 s6, s32
870; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s6
871; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s4, 6, v0
872; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
873; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 1
874; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s6
875; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
876; GFX9-SDAG-NEXT:    s_cbranch_execnz .LBB7_5
877; GFX9-SDAG-NEXT:  .LBB7_4: ; %bb.0
878; GFX9-SDAG-NEXT:    s_lshl_b32 s5, s5, 2
879; GFX9-SDAG-NEXT:    s_add_i32 s4, s32, 0xfff
880; GFX9-SDAG-NEXT:    s_add_i32 s5, s5, 15
881; GFX9-SDAG-NEXT:    s_and_b32 s4, s4, 0xfffff000
882; GFX9-SDAG-NEXT:    s_and_b32 s5, s5, -16
883; GFX9-SDAG-NEXT:    s_lshl_b32 s5, s5, 6
884; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 2
885; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s4
886; GFX9-SDAG-NEXT:    s_add_i32 s32, s4, s5
887; GFX9-SDAG-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
888; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
889; GFX9-SDAG-NEXT:  .LBB7_5: ; %bb.2
890; GFX9-SDAG-NEXT:    s_endpgm
891; GFX9-SDAG-NEXT:  .LBB7_6:
892; GFX9-SDAG-NEXT:    s_branch .LBB7_4
893;
894; GFX9-GISEL-LABEL: test_dynamic_stackalloc_kernel_control_flow:
895; GFX9-GISEL:       ; %bb.0: ; %entry
896; GFX9-GISEL-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
897; GFX9-GISEL-NEXT:    s_add_u32 s0, s0, s17
898; GFX9-GISEL-NEXT:    s_addc_u32 s1, s1, 0
899; GFX9-GISEL-NEXT:    s_mov_b32 s8, 0
900; GFX9-GISEL-NEXT:    s_mov_b32 s33, 0
901; GFX9-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
902; GFX9-GISEL-NEXT:    s_cmp_lg_u32 s4, 0
903; GFX9-GISEL-NEXT:    s_mov_b32 s4, 1
904; GFX9-GISEL-NEXT:    s_movk_i32 s32, 0x1000
905; GFX9-GISEL-NEXT:    s_cbranch_scc0 .LBB7_4
906; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb.1
907; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
908; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
909; GFX9-GISEL-NEXT:    s_mov_b64 s[6:7], exec
910; GFX9-GISEL-NEXT:  .LBB7_2: ; =>This Inner Loop Header: Depth=1
911; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s4, s[6:7]
912; GFX9-GISEL-NEXT:    v_readlane_b32 s9, v0, s4
913; GFX9-GISEL-NEXT:    s_bitset0_b64 s[6:7], s4
914; GFX9-GISEL-NEXT:    s_max_u32 s8, s8, s9
915; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
916; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB7_2
917; GFX9-GISEL-NEXT:  ; %bb.3:
918; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
919; GFX9-GISEL-NEXT:    s_lshl_b32 s6, s8, 6
920; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 1
921; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
922; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s6
923; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
924; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
925; GFX9-GISEL-NEXT:    s_mov_b32 s4, 0
926; GFX9-GISEL-NEXT:  .LBB7_4: ; %Flow
927; GFX9-GISEL-NEXT:    s_xor_b32 s4, s4, 1
928; GFX9-GISEL-NEXT:    s_and_b32 s4, s4, 1
929; GFX9-GISEL-NEXT:    s_cmp_lg_u32 s4, 0
930; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB7_6
931; GFX9-GISEL-NEXT:  ; %bb.5: ; %bb.0
932; GFX9-GISEL-NEXT:    s_lshl2_add_u32 s4, s5, 15
933; GFX9-GISEL-NEXT:    s_add_u32 s5, s32, 0xfff
934; GFX9-GISEL-NEXT:    s_and_b32 s4, s4, -16
935; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, 0xfffff000
936; GFX9-GISEL-NEXT:    s_lshl_b32 s4, s4, 6
937; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 2
938; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
939; GFX9-GISEL-NEXT:    s_add_u32 s32, s5, s4
940; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
941; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
942; GFX9-GISEL-NEXT:  .LBB7_6: ; %bb.2
943; GFX9-GISEL-NEXT:    s_endpgm
944;
945; GFX11-SDAG-LABEL: test_dynamic_stackalloc_kernel_control_flow:
946; GFX11-SDAG:       ; %bb.0: ; %entry
947; GFX11-SDAG-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
948; GFX11-SDAG-NEXT:    s_mov_b32 s33, 0
949; GFX11-SDAG-NEXT:    s_mov_b32 s32, 64
950; GFX11-SDAG-NEXT:    s_waitcnt lgkmcnt(0)
951; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s0, 0
952; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
953; GFX11-SDAG-NEXT:    s_cbranch_scc0 .LBB7_6
954; GFX11-SDAG-NEXT:  ; %bb.1: ; %bb.1
955; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
956; GFX11-SDAG-NEXT:    s_mov_b32 s2, exec_lo
957; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
958; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
959; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
960; GFX11-SDAG-NEXT:  .LBB7_2: ; =>This Inner Loop Header: Depth=1
961; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s3, s2
962; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
963; GFX11-SDAG-NEXT:    v_readlane_b32 s4, v0, s3
964; GFX11-SDAG-NEXT:    s_bitset0_b32 s2, s3
965; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
966; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s4
967; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
968; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB7_2
969; GFX11-SDAG-NEXT:  ; %bb.3:
970; GFX11-SDAG-NEXT:    s_mov_b32 s2, s32
971; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
972; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s2
973; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s2 dlc
974; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
975; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
976; GFX11-SDAG-NEXT:    s_cbranch_execnz .LBB7_5
977; GFX11-SDAG-NEXT:  .LBB7_4: ; %bb.0
978; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s1, 2
979; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 2
980; GFX11-SDAG-NEXT:    s_add_i32 s0, s0, 15
981; GFX11-SDAG-NEXT:    s_add_i32 s1, s32, 0x7ff
982; GFX11-SDAG-NEXT:    s_and_b32 s0, s0, -16
983; GFX11-SDAG-NEXT:    s_and_b32 s1, s1, 0xfffff800
984; GFX11-SDAG-NEXT:    s_lshl_b32 s0, s0, 5
985; GFX11-SDAG-NEXT:    scratch_store_b32 off, v0, s1 dlc
986; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
987; GFX11-SDAG-NEXT:    s_add_i32 s32, s1, s0
988; GFX11-SDAG-NEXT:  .LBB7_5: ; %bb.2
989; GFX11-SDAG-NEXT:    s_endpgm
990; GFX11-SDAG-NEXT:  .LBB7_6:
991; GFX11-SDAG-NEXT:    s_branch .LBB7_4
992;
993; GFX11-GISEL-LABEL: test_dynamic_stackalloc_kernel_control_flow:
994; GFX11-GISEL:       ; %bb.0: ; %entry
995; GFX11-GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x0
996; GFX11-GISEL-NEXT:    s_mov_b32 s2, 0
997; GFX11-GISEL-NEXT:    s_mov_b32 s33, 0
998; GFX11-GISEL-NEXT:    s_mov_b32 s32, 64
999; GFX11-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
1000; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
1001; GFX11-GISEL-NEXT:    s_mov_b32 s0, 1
1002; GFX11-GISEL-NEXT:    s_cbranch_scc0 .LBB7_4
1003; GFX11-GISEL-NEXT:  ; %bb.1: ; %bb.1
1004; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
1005; GFX11-GISEL-NEXT:    s_mov_b32 s0, exec_lo
1006; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1007; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1008; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1009; GFX11-GISEL-NEXT:  .LBB7_2: ; =>This Inner Loop Header: Depth=1
1010; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s3, s0
1011; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1012; GFX11-GISEL-NEXT:    v_readlane_b32 s4, v0, s3
1013; GFX11-GISEL-NEXT:    s_bitset0_b32 s0, s3
1014; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1015; GFX11-GISEL-NEXT:    s_max_u32 s2, s2, s4
1016; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
1017; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB7_2
1018; GFX11-GISEL-NEXT:  ; %bb.3:
1019; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 1
1020; GFX11-GISEL-NEXT:    s_mov_b32 s3, s32
1021; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s2, 5
1022; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1023; GFX11-GISEL-NEXT:    s_add_u32 s32, s3, s0
1024; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
1025; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s3 dlc
1026; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1027; GFX11-GISEL-NEXT:  .LBB7_4: ; %Flow
1028; GFX11-GISEL-NEXT:    s_xor_b32 s0, s0, 1
1029; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
1030; GFX11-GISEL-NEXT:    s_and_b32 s0, s0, 1
1031; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s0, 0
1032; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB7_6
1033; GFX11-GISEL-NEXT:  ; %bb.5: ; %bb.0
1034; GFX11-GISEL-NEXT:    s_lshl2_add_u32 s0, s1, 15
1035; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 2
1036; GFX11-GISEL-NEXT:    s_add_u32 s1, s32, 0x7ff
1037; GFX11-GISEL-NEXT:    s_and_b32 s0, s0, -16
1038; GFX11-GISEL-NEXT:    s_and_b32 s1, s1, 0xfffff800
1039; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
1040; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
1041; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1042; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
1043; GFX11-GISEL-NEXT:  .LBB7_6: ; %bb.2
1044; GFX11-GISEL-NEXT:    s_endpgm
1045entry:
1046  %cond = icmp eq i32 %n, 0
1047  br i1 %cond, label %bb.0, label %bb.1
1048bb.0:
1049  %alloca2 = alloca i32, i32 %m, align 64, addrspace(5)
1050  store volatile i32 2, ptr addrspace(5) %alloca2
1051  br label %bb.2
1052bb.1:
1053  %idx = call i32 @llvm.amdgcn.workitem.id.x()
1054  %alloca1 = alloca i32, i32 %idx, align 4, addrspace(5)
1055  store volatile i32 1, ptr addrspace(5) %alloca1
1056  br label %bb.2
1057bb.2:
1058  ret void
1059}
1060
1061define void @test_dynamic_stackalloc_device_uniform(i32 %n) {
1062; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform:
1063; GFX9-SDAG:       ; %bb.0:
1064; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1065; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1066; GFX9-SDAG-NEXT:    s_mov_b32 s9, s33
1067; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
1068; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
1069; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
1070; GFX9-SDAG-NEXT:    s_mov_b32 s33, s32
1071; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x400
1072; GFX9-SDAG-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1073; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1074; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
1075; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
1076; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
1077; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
1078; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB8_1
1079; GFX9-SDAG-NEXT:  ; %bb.2:
1080; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
1081; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1082; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s6, 6, v0
1083; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1084; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7b
1085; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s4
1086; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1087; GFX9-SDAG-NEXT:    s_mov_b32 s32, s33
1088; GFX9-SDAG-NEXT:    s_mov_b32 s33, s9
1089; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1090;
1091; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform:
1092; GFX9-GISEL:       ; %bb.0:
1093; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1094; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1095; GFX9-GISEL-NEXT:    s_mov_b32 s9, s33
1096; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1097; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
1098; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
1099; GFX9-GISEL-NEXT:    s_mov_b32 s33, s32
1100; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x400
1101; GFX9-GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1102; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1103; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
1104; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
1105; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
1106; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
1107; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
1108; GFX9-GISEL-NEXT:  ; %bb.2:
1109; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
1110; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s6, 6
1111; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
1112; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
1113; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
1114; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
1115; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1116; GFX9-GISEL-NEXT:    s_mov_b32 s32, s33
1117; GFX9-GISEL-NEXT:    s_mov_b32 s33, s9
1118; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1119;
1120; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform:
1121; GFX11-SDAG:       ; %bb.0:
1122; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1123; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1124; GFX11-SDAG-NEXT:    s_mov_b32 s4, s33
1125; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
1126; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
1127; GFX11-SDAG-NEXT:    s_mov_b32 s33, s32
1128; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
1129; GFX11-SDAG-NEXT:    s_add_i32 s32, s32, 16
1130; GFX11-SDAG-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1131; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
1132; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1133; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
1134; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
1135; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1136; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
1137; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
1138; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB8_1
1139; GFX11-SDAG-NEXT:  ; %bb.2:
1140; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
1141; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7b
1142; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
1143; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
1144; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1145; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1146; GFX11-SDAG-NEXT:    s_mov_b32 s32, s33
1147; GFX11-SDAG-NEXT:    s_mov_b32 s33, s4
1148; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1149;
1150; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform:
1151; GFX11-GISEL:       ; %bb.0:
1152; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1154; GFX11-GISEL-NEXT:    s_mov_b32 s4, s33
1155; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
1156; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
1157; GFX11-GISEL-NEXT:    s_mov_b32 s33, s32
1158; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1159; GFX11-GISEL-NEXT:    s_add_i32 s32, s32, 16
1160; GFX11-GISEL-NEXT:  .LBB8_1: ; =>This Inner Loop Header: Depth=1
1161; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
1162; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1163; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
1164; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
1165; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1166; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
1167; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
1168; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB8_1
1169; GFX11-GISEL-NEXT:  ; %bb.2:
1170; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
1171; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
1172; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
1173; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1174; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
1175; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
1176; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1177; GFX11-GISEL-NEXT:    s_mov_b32 s32, s33
1178; GFX11-GISEL-NEXT:    s_mov_b32 s33, s4
1179; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1180  %alloca = alloca i32, i32 %n, addrspace(5)
1181  store volatile i32 123, ptr addrspace(5) %alloca
1182  ret void
1183}
1184
1185define void @test_dynamic_stackalloc_device_uniform_over_aligned(i32 %n) {
1186; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned:
1187; GFX9-SDAG:       ; %bb.0:
1188; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1189; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1190; GFX9-SDAG-NEXT:    s_mov_b32 s9, s33
1191; GFX9-SDAG-NEXT:    s_add_i32 s33, s32, 0x1fc0
1192; GFX9-SDAG-NEXT:    s_mov_b32 s10, s34
1193; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
1194; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
1195; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
1196; GFX9-SDAG-NEXT:    s_and_b32 s33, s33, 0xffffe000
1197; GFX9-SDAG-NEXT:    s_mov_b32 s34, s32
1198; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x4000
1199; GFX9-SDAG-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1200; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1201; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
1202; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
1203; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
1204; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
1205; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB9_1
1206; GFX9-SDAG-NEXT:  ; %bb.2:
1207; GFX9-SDAG-NEXT:    s_add_i32 s4, s32, 0x1fff
1208; GFX9-SDAG-NEXT:    s_and_b32 s4, s4, 0xffffe000
1209; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1210; GFX9-SDAG-NEXT:    v_lshl_add_u32 v1, s6, 6, v0
1211; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v1
1212; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 10
1213; GFX9-SDAG-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
1214; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1215; GFX9-SDAG-NEXT:    s_mov_b32 s32, s34
1216; GFX9-SDAG-NEXT:    s_mov_b32 s34, s10
1217; GFX9-SDAG-NEXT:    s_mov_b32 s33, s9
1218; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1219;
1220; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned:
1221; GFX9-GISEL:       ; %bb.0:
1222; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1223; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1224; GFX9-GISEL-NEXT:    s_mov_b32 s9, s33
1225; GFX9-GISEL-NEXT:    s_add_i32 s33, s32, 0x1fc0
1226; GFX9-GISEL-NEXT:    s_mov_b32 s10, s34
1227; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1228; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
1229; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
1230; GFX9-GISEL-NEXT:    s_and_b32 s33, s33, 0xffffe000
1231; GFX9-GISEL-NEXT:    s_mov_b32 s34, s32
1232; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x4000
1233; GFX9-GISEL-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1234; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1235; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
1236; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
1237; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
1238; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
1239; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB9_1
1240; GFX9-GISEL-NEXT:  ; %bb.2:
1241; GFX9-GISEL-NEXT:    s_add_u32 s5, s32, 0x1fff
1242; GFX9-GISEL-NEXT:    s_lshl_b32 s4, s6, 6
1243; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, 0xffffe000
1244; GFX9-GISEL-NEXT:    s_add_u32 s32, s5, s4
1245; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 10
1246; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
1247; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
1248; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1249; GFX9-GISEL-NEXT:    s_mov_b32 s32, s34
1250; GFX9-GISEL-NEXT:    s_mov_b32 s34, s10
1251; GFX9-GISEL-NEXT:    s_mov_b32 s33, s9
1252; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1253;
1254; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned:
1255; GFX11-SDAG:       ; %bb.0:
1256; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1257; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1258; GFX11-SDAG-NEXT:    s_mov_b32 s4, s33
1259; GFX11-SDAG-NEXT:    s_add_i32 s33, s32, 0x7f
1260; GFX11-SDAG-NEXT:    s_mov_b32 s5, s34
1261; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
1262; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
1263; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
1264; GFX11-SDAG-NEXT:    s_and_b32 s33, s33, 0xffffff80
1265; GFX11-SDAG-NEXT:    s_mov_b32 s34, s32
1266; GFX11-SDAG-NEXT:    s_addk_i32 s32, 0x100
1267; GFX11-SDAG-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1268; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
1269; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1270; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
1271; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
1272; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
1273; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
1274; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB9_1
1275; GFX11-SDAG-NEXT:  ; %bb.2:
1276; GFX11-SDAG-NEXT:    s_add_i32 s1, s32, 0xfff
1277; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 10
1278; GFX11-SDAG-NEXT:    s_and_b32 s1, s1, 0xfffff000
1279; GFX11-SDAG-NEXT:    s_mov_b32 s33, s4
1280; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
1281; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
1282; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1283; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1284; GFX11-SDAG-NEXT:    s_mov_b32 s32, s34
1285; GFX11-SDAG-NEXT:    s_mov_b32 s34, s5
1286; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1287;
1288; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_over_aligned:
1289; GFX11-GISEL:       ; %bb.0:
1290; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1291; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1292; GFX11-GISEL-NEXT:    s_mov_b32 s4, s33
1293; GFX11-GISEL-NEXT:    s_add_i32 s33, s32, 0x7f
1294; GFX11-GISEL-NEXT:    s_mov_b32 s5, s34
1295; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
1296; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1297; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
1298; GFX11-GISEL-NEXT:    s_and_b32 s33, s33, 0xffffff80
1299; GFX11-GISEL-NEXT:    s_mov_b32 s34, s32
1300; GFX11-GISEL-NEXT:    s_addk_i32 s32, 0x100
1301; GFX11-GISEL-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
1302; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
1303; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
1304; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
1305; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
1306; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
1307; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
1308; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB9_1
1309; GFX11-GISEL-NEXT:  ; %bb.2:
1310; GFX11-GISEL-NEXT:    s_add_u32 s1, s32, 0xfff
1311; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 10
1312; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
1313; GFX11-GISEL-NEXT:    s_and_b32 s1, s1, 0xfffff000
1314; GFX11-GISEL-NEXT:    s_mov_b32 s33, s4
1315; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
1316; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
1317; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1318; GFX11-GISEL-NEXT:    s_mov_b32 s32, s34
1319; GFX11-GISEL-NEXT:    s_mov_b32 s34, s5
1320; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1321  %alloca = alloca i32, i32 %n, align 128, addrspace(5)
1322  store volatile i32 10, ptr addrspace(5) %alloca
1323  ret void
1324}
1325
1326define void @test_dynamic_stackalloc_device_uniform_under_aligned(i32 %n) {
1327; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned:
1328; GFX9-SDAG:       ; %bb.0:
1329; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1331; GFX9-SDAG-NEXT:    s_mov_b32 s9, s33
1332; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
1333; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
1334; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
1335; GFX9-SDAG-NEXT:    s_mov_b32 s33, s32
1336; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x400
1337; GFX9-SDAG-NEXT:  .LBB10_1: ; =>This Inner Loop Header: Depth=1
1338; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1339; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
1340; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
1341; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
1342; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
1343; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB10_1
1344; GFX9-SDAG-NEXT:  ; %bb.2:
1345; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
1346; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1347; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s6, 6, v0
1348; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1349; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 22
1350; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s4
1351; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1352; GFX9-SDAG-NEXT:    s_mov_b32 s32, s33
1353; GFX9-SDAG-NEXT:    s_mov_b32 s33, s9
1354; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1355;
1356; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned:
1357; GFX9-GISEL:       ; %bb.0:
1358; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1359; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1360; GFX9-GISEL-NEXT:    s_mov_b32 s9, s33
1361; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1362; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
1363; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
1364; GFX9-GISEL-NEXT:    s_mov_b32 s33, s32
1365; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x400
1366; GFX9-GISEL-NEXT:  .LBB10_1: ; =>This Inner Loop Header: Depth=1
1367; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1368; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
1369; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
1370; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
1371; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
1372; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB10_1
1373; GFX9-GISEL-NEXT:  ; %bb.2:
1374; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
1375; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s6, 6
1376; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
1377; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 22
1378; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
1379; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
1380; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1381; GFX9-GISEL-NEXT:    s_mov_b32 s32, s33
1382; GFX9-GISEL-NEXT:    s_mov_b32 s33, s9
1383; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1384;
1385; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned:
1386; GFX11-SDAG:       ; %bb.0:
1387; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1388; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1389; GFX11-SDAG-NEXT:    s_mov_b32 s4, s33
1390; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
1391; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
1392; GFX11-SDAG-NEXT:    s_mov_b32 s33, s32
1393; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
1394; GFX11-SDAG-NEXT:    s_add_i32 s32, s32, 16
1395; GFX11-SDAG-NEXT:  .LBB10_1: ; =>This Inner Loop Header: Depth=1
1396; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
1397; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1398; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
1399; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
1400; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1401; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
1402; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
1403; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB10_1
1404; GFX11-SDAG-NEXT:  ; %bb.2:
1405; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
1406; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 22
1407; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
1408; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
1409; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1410; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1411; GFX11-SDAG-NEXT:    s_mov_b32 s32, s33
1412; GFX11-SDAG-NEXT:    s_mov_b32 s33, s4
1413; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1414;
1415; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_uniform_under_aligned:
1416; GFX11-GISEL:       ; %bb.0:
1417; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1418; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1419; GFX11-GISEL-NEXT:    s_mov_b32 s4, s33
1420; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
1421; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
1422; GFX11-GISEL-NEXT:    s_mov_b32 s33, s32
1423; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1424; GFX11-GISEL-NEXT:    s_add_i32 s32, s32, 16
1425; GFX11-GISEL-NEXT:  .LBB10_1: ; =>This Inner Loop Header: Depth=1
1426; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
1427; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1428; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
1429; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
1430; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1431; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
1432; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
1433; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB10_1
1434; GFX11-GISEL-NEXT:  ; %bb.2:
1435; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 22
1436; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
1437; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
1438; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1439; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
1440; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
1441; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1442; GFX11-GISEL-NEXT:    s_mov_b32 s32, s33
1443; GFX11-GISEL-NEXT:    s_mov_b32 s33, s4
1444; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1445  %alloca = alloca i32, i32 %n, align 2, addrspace(5)
1446  store volatile i32 22, ptr addrspace(5) %alloca
1447  ret void
1448}
1449
1450define void @test_dynamic_stackalloc_device_divergent() {
1451; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent:
1452; GFX9-SDAG:       ; %bb.0:
1453; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1454; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1455; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1456; GFX9-SDAG-NEXT:    s_mov_b32 s9, s33
1457; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
1458; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
1459; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
1460; GFX9-SDAG-NEXT:    s_mov_b32 s33, s32
1461; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x400
1462; GFX9-SDAG-NEXT:  .LBB11_1: ; =>This Inner Loop Header: Depth=1
1463; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1464; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
1465; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
1466; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
1467; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
1468; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB11_1
1469; GFX9-SDAG-NEXT:  ; %bb.2:
1470; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
1471; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1472; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s6, 6, v0
1473; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1474; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x7b
1475; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s4
1476; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1477; GFX9-SDAG-NEXT:    s_mov_b32 s32, s33
1478; GFX9-SDAG-NEXT:    s_mov_b32 s33, s9
1479; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1480;
1481; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent:
1482; GFX9-GISEL:       ; %bb.0:
1483; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1484; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1485; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1486; GFX9-GISEL-NEXT:    s_mov_b32 s9, s33
1487; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1488; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
1489; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
1490; GFX9-GISEL-NEXT:    s_mov_b32 s33, s32
1491; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x400
1492; GFX9-GISEL-NEXT:  .LBB11_1: ; =>This Inner Loop Header: Depth=1
1493; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1494; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
1495; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
1496; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
1497; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
1498; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB11_1
1499; GFX9-GISEL-NEXT:  ; %bb.2:
1500; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
1501; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s6, 6
1502; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
1503; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
1504; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
1505; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
1506; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1507; GFX9-GISEL-NEXT:    s_mov_b32 s32, s33
1508; GFX9-GISEL-NEXT:    s_mov_b32 s33, s9
1509; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1510;
1511; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent:
1512; GFX11-SDAG:       ; %bb.0:
1513; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1514; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1515; GFX11-SDAG-NEXT:    s_mov_b32 s4, s33
1516; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
1517; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
1518; GFX11-SDAG-NEXT:    s_mov_b32 s33, s32
1519; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1520; GFX11-SDAG-NEXT:    s_add_i32 s32, s32, 16
1521; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1522; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
1523; GFX11-SDAG-NEXT:  .LBB11_1: ; =>This Inner Loop Header: Depth=1
1524; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
1525; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1526; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
1527; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
1528; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1529; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
1530; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
1531; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB11_1
1532; GFX11-SDAG-NEXT:  ; %bb.2:
1533; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
1534; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x7b
1535; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
1536; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
1537; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1538; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1539; GFX11-SDAG-NEXT:    s_mov_b32 s32, s33
1540; GFX11-SDAG-NEXT:    s_mov_b32 s33, s4
1541; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1542;
1543; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent:
1544; GFX11-GISEL:       ; %bb.0:
1545; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1546; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1547; GFX11-GISEL-NEXT:    s_mov_b32 s4, s33
1548; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
1549; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
1550; GFX11-GISEL-NEXT:    s_mov_b32 s33, s32
1551; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1552; GFX11-GISEL-NEXT:    s_add_i32 s32, s32, 16
1553; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1554; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1555; GFX11-GISEL-NEXT:  .LBB11_1: ; =>This Inner Loop Header: Depth=1
1556; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
1557; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1558; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
1559; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
1560; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1561; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
1562; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
1563; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB11_1
1564; GFX11-GISEL-NEXT:  ; %bb.2:
1565; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
1566; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
1567; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
1568; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1569; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
1570; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
1571; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1572; GFX11-GISEL-NEXT:    s_mov_b32 s32, s33
1573; GFX11-GISEL-NEXT:    s_mov_b32 s33, s4
1574; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1575  %idx = call i32 @llvm.amdgcn.workitem.id.x()
1576  %alloca = alloca i32, i32 %idx, addrspace(5)
1577  store volatile i32 123, ptr addrspace(5) %alloca
1578  ret void
1579}
1580
1581define void @test_dynamic_stackalloc_device_divergent_over_aligned() {
1582; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned:
1583; GFX9-SDAG:       ; %bb.0:
1584; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585; GFX9-SDAG-NEXT:    s_mov_b32 s10, s33
1586; GFX9-SDAG-NEXT:    s_add_i32 s33, s32, 0x1fc0
1587; GFX9-SDAG-NEXT:    s_mov_b32 s11, s34
1588; GFX9-SDAG-NEXT:    s_mov_b32 s34, s32
1589; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x4000
1590; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1591; GFX9-SDAG-NEXT:    s_add_i32 s4, s32, 0x1fff
1592; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1593; GFX9-SDAG-NEXT:    s_and_b32 s6, s4, 0xffffe000
1594; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
1595; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
1596; GFX9-SDAG-NEXT:    s_mov_b32 s7, 0
1597; GFX9-SDAG-NEXT:    s_and_b32 s33, s33, 0xffffe000
1598; GFX9-SDAG-NEXT:  .LBB12_1: ; =>This Inner Loop Header: Depth=1
1599; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s8, s[4:5]
1600; GFX9-SDAG-NEXT:    v_readlane_b32 s9, v0, s8
1601; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s8
1602; GFX9-SDAG-NEXT:    s_max_u32 s7, s7, s9
1603; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
1604; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB12_1
1605; GFX9-SDAG-NEXT:  ; %bb.2:
1606; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s6
1607; GFX9-SDAG-NEXT:    v_lshl_add_u32 v1, s7, 6, v0
1608; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v1
1609; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1bc
1610; GFX9-SDAG-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
1611; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1612; GFX9-SDAG-NEXT:    s_mov_b32 s32, s34
1613; GFX9-SDAG-NEXT:    s_mov_b32 s34, s11
1614; GFX9-SDAG-NEXT:    s_mov_b32 s33, s10
1615; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1616;
1617; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned:
1618; GFX9-GISEL:       ; %bb.0:
1619; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1620; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1621; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1622; GFX9-GISEL-NEXT:    s_mov_b32 s9, s33
1623; GFX9-GISEL-NEXT:    s_add_i32 s33, s32, 0x1fc0
1624; GFX9-GISEL-NEXT:    s_mov_b32 s10, s34
1625; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1626; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
1627; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
1628; GFX9-GISEL-NEXT:    s_and_b32 s33, s33, 0xffffe000
1629; GFX9-GISEL-NEXT:    s_mov_b32 s34, s32
1630; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x4000
1631; GFX9-GISEL-NEXT:  .LBB12_1: ; =>This Inner Loop Header: Depth=1
1632; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1633; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
1634; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
1635; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
1636; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
1637; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB12_1
1638; GFX9-GISEL-NEXT:  ; %bb.2:
1639; GFX9-GISEL-NEXT:    s_add_u32 s5, s32, 0x1fff
1640; GFX9-GISEL-NEXT:    s_lshl_b32 s4, s6, 6
1641; GFX9-GISEL-NEXT:    s_and_b32 s5, s5, 0xffffe000
1642; GFX9-GISEL-NEXT:    s_add_u32 s32, s5, s4
1643; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x1bc
1644; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s5
1645; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
1646; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1647; GFX9-GISEL-NEXT:    s_mov_b32 s32, s34
1648; GFX9-GISEL-NEXT:    s_mov_b32 s34, s10
1649; GFX9-GISEL-NEXT:    s_mov_b32 s33, s9
1650; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1651;
1652; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned:
1653; GFX11-SDAG:       ; %bb.0:
1654; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1655; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1656; GFX11-SDAG-NEXT:    s_mov_b32 s5, s33
1657; GFX11-SDAG-NEXT:    s_add_i32 s33, s32, 0x7f
1658; GFX11-SDAG-NEXT:    s_mov_b32 s6, s34
1659; GFX11-SDAG-NEXT:    s_mov_b32 s34, s32
1660; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1661; GFX11-SDAG-NEXT:    s_addk_i32 s32, 0x100
1662; GFX11-SDAG-NEXT:    s_mov_b32 s2, exec_lo
1663; GFX11-SDAG-NEXT:    s_add_i32 s0, s32, 0xfff
1664; GFX11-SDAG-NEXT:    s_mov_b32 s1, 0
1665; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
1666; GFX11-SDAG-NEXT:    s_and_b32 s0, s0, 0xfffff000
1667; GFX11-SDAG-NEXT:    s_and_b32 s33, s33, 0xffffff80
1668; GFX11-SDAG-NEXT:  .LBB12_1: ; =>This Inner Loop Header: Depth=1
1669; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s3, s2
1670; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1671; GFX11-SDAG-NEXT:    v_readlane_b32 s4, v0, s3
1672; GFX11-SDAG-NEXT:    s_bitset0_b32 s2, s3
1673; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1674; GFX11-SDAG-NEXT:    s_max_u32 s1, s1, s4
1675; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
1676; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB12_1
1677; GFX11-SDAG-NEXT:  ; %bb.2:
1678; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s1, 5, s0
1679; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x1bc
1680; GFX11-SDAG-NEXT:    s_mov_b32 s33, s5
1681; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1682; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1683; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s0 dlc
1684; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1685; GFX11-SDAG-NEXT:    s_mov_b32 s32, s34
1686; GFX11-SDAG-NEXT:    s_mov_b32 s34, s6
1687; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1688;
1689; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_over_aligned:
1690; GFX11-GISEL:       ; %bb.0:
1691; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1692; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1693; GFX11-GISEL-NEXT:    s_mov_b32 s4, s33
1694; GFX11-GISEL-NEXT:    s_add_i32 s33, s32, 0x7f
1695; GFX11-GISEL-NEXT:    s_mov_b32 s5, s34
1696; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
1697; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1698; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
1699; GFX11-GISEL-NEXT:    s_and_b32 s33, s33, 0xffffff80
1700; GFX11-GISEL-NEXT:    s_mov_b32 s34, s32
1701; GFX11-GISEL-NEXT:    s_addk_i32 s32, 0x100
1702; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1703; GFX11-GISEL-NEXT:  .LBB12_1: ; =>This Inner Loop Header: Depth=1
1704; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
1705; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1706; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
1707; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
1708; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1709; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
1710; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
1711; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB12_1
1712; GFX11-GISEL-NEXT:  ; %bb.2:
1713; GFX11-GISEL-NEXT:    s_add_u32 s1, s32, 0xfff
1714; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x1bc
1715; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
1716; GFX11-GISEL-NEXT:    s_and_b32 s1, s1, 0xfffff000
1717; GFX11-GISEL-NEXT:    s_mov_b32 s33, s4
1718; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
1719; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
1720; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1721; GFX11-GISEL-NEXT:    s_mov_b32 s32, s34
1722; GFX11-GISEL-NEXT:    s_mov_b32 s34, s5
1723; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1724  %idx = call i32 @llvm.amdgcn.workitem.id.x()
1725  %alloca = alloca i32, i32 %idx, align 128, addrspace(5)
1726  store volatile i32 444, ptr addrspace(5) %alloca
1727  ret void
1728}
1729
1730define void @test_dynamic_stackalloc_device_divergent_under_aligned() {
1731; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned:
1732; GFX9-SDAG:       ; %bb.0:
1733; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1734; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1735; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1736; GFX9-SDAG-NEXT:    s_mov_b32 s9, s33
1737; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
1738; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
1739; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
1740; GFX9-SDAG-NEXT:    s_mov_b32 s33, s32
1741; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x400
1742; GFX9-SDAG-NEXT:  .LBB13_1: ; =>This Inner Loop Header: Depth=1
1743; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1744; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
1745; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
1746; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
1747; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
1748; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB13_1
1749; GFX9-SDAG-NEXT:  ; %bb.2:
1750; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
1751; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1752; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s6, 6, v0
1753; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1754; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x29a
1755; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s4
1756; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1757; GFX9-SDAG-NEXT:    s_mov_b32 s32, s33
1758; GFX9-SDAG-NEXT:    s_mov_b32 s33, s9
1759; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1760;
1761; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned:
1762; GFX9-GISEL:       ; %bb.0:
1763; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1765; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1766; GFX9-GISEL-NEXT:    s_mov_b32 s9, s33
1767; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1768; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
1769; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
1770; GFX9-GISEL-NEXT:    s_mov_b32 s33, s32
1771; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x400
1772; GFX9-GISEL-NEXT:  .LBB13_1: ; =>This Inner Loop Header: Depth=1
1773; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
1774; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
1775; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
1776; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
1777; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
1778; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB13_1
1779; GFX9-GISEL-NEXT:  ; %bb.2:
1780; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
1781; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s6, 6
1782; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
1783; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x29a
1784; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
1785; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
1786; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1787; GFX9-GISEL-NEXT:    s_mov_b32 s32, s33
1788; GFX9-GISEL-NEXT:    s_mov_b32 s33, s9
1789; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
1790;
1791; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned:
1792; GFX11-SDAG:       ; %bb.0:
1793; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1794; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1795; GFX11-SDAG-NEXT:    s_mov_b32 s4, s33
1796; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
1797; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
1798; GFX11-SDAG-NEXT:    s_mov_b32 s33, s32
1799; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1800; GFX11-SDAG-NEXT:    s_add_i32 s32, s32, 16
1801; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1802; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
1803; GFX11-SDAG-NEXT:  .LBB13_1: ; =>This Inner Loop Header: Depth=1
1804; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
1805; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1806; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
1807; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
1808; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1809; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
1810; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
1811; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB13_1
1812; GFX11-SDAG-NEXT:  ; %bb.2:
1813; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
1814; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x29a
1815; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
1816; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
1817; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
1818; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1819; GFX11-SDAG-NEXT:    s_mov_b32 s32, s33
1820; GFX11-SDAG-NEXT:    s_mov_b32 s33, s4
1821; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
1822;
1823; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_under_aligned:
1824; GFX11-GISEL:       ; %bb.0:
1825; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1826; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
1827; GFX11-GISEL-NEXT:    s_mov_b32 s4, s33
1828; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
1829; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
1830; GFX11-GISEL-NEXT:    s_mov_b32 s33, s32
1831; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1832; GFX11-GISEL-NEXT:    s_add_i32 s32, s32, 16
1833; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1834; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
1835; GFX11-GISEL-NEXT:  .LBB13_1: ; =>This Inner Loop Header: Depth=1
1836; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
1837; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
1838; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
1839; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
1840; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1841; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
1842; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
1843; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB13_1
1844; GFX11-GISEL-NEXT:  ; %bb.2:
1845; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x29a
1846; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
1847; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
1848; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1849; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
1850; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
1851; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
1852; GFX11-GISEL-NEXT:    s_mov_b32 s32, s33
1853; GFX11-GISEL-NEXT:    s_mov_b32 s33, s4
1854; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
1855  %idx = call i32 @llvm.amdgcn.workitem.id.x()
1856  %alloca = alloca i32, i32 %idx, align 2, addrspace(5)
1857  store volatile i32 666, ptr addrspace(5) %alloca
1858  ret void
1859}
1860
1861define void @test_dynamic_stackalloc_device_multiple_allocas(i32 %n, i32 %m) {
1862; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_multiple_allocas:
1863; GFX9-SDAG:       ; %bb.0: ; %entry
1864; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1865; GFX9-SDAG-NEXT:    s_mov_b32 s13, s33
1866; GFX9-SDAG-NEXT:    s_add_i32 s33, s32, 0xfc0
1867; GFX9-SDAG-NEXT:    s_mov_b32 s14, s34
1868; GFX9-SDAG-NEXT:    s_mov_b32 s8, 0
1869; GFX9-SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1870; GFX9-SDAG-NEXT:    s_and_b32 s33, s33, 0xfffff000
1871; GFX9-SDAG-NEXT:    s_mov_b32 s34, s32
1872; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x3000
1873; GFX9-SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1874; GFX9-SDAG-NEXT:    s_cbranch_execz .LBB14_6
1875; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb.0
1876; GFX9-SDAG-NEXT:    v_lshl_add_u32 v1, v1, 2, 15
1877; GFX9-SDAG-NEXT:    v_and_b32_e32 v1, -16, v1
1878; GFX9-SDAG-NEXT:    s_mov_b64 s[6:7], exec
1879; GFX9-SDAG-NEXT:    s_mov_b32 s10, 0
1880; GFX9-SDAG-NEXT:  .LBB14_2: ; =>This Inner Loop Header: Depth=1
1881; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s9, s[6:7]
1882; GFX9-SDAG-NEXT:    v_readlane_b32 s11, v1, s9
1883; GFX9-SDAG-NEXT:    s_bitset0_b64 s[6:7], s9
1884; GFX9-SDAG-NEXT:    s_max_u32 s10, s10, s11
1885; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[6:7], 0
1886; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB14_2
1887; GFX9-SDAG-NEXT:  ; %bb.3:
1888; GFX9-SDAG-NEXT:    s_add_i32 s6, s32, 0xfff
1889; GFX9-SDAG-NEXT:    s_and_b32 s9, s6, 0xfffff000
1890; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s9
1891; GFX9-SDAG-NEXT:    v_lshl_add_u32 v1, s10, 6, v1
1892; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v1
1893; GFX9-SDAG-NEXT:    v_and_b32_e32 v1, 0x3ff, v31
1894; GFX9-SDAG-NEXT:    v_lshl_add_u32 v1, v1, 2, 15
1895; GFX9-SDAG-NEXT:    v_and_b32_e32 v1, 0x1ff0, v1
1896; GFX9-SDAG-NEXT:    s_mov_b64 s[6:7], exec
1897; GFX9-SDAG-NEXT:    s_mov_b32 s10, 0
1898; GFX9-SDAG-NEXT:  .LBB14_4: ; =>This Inner Loop Header: Depth=1
1899; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s11, s[6:7]
1900; GFX9-SDAG-NEXT:    v_readlane_b32 s12, v1, s11
1901; GFX9-SDAG-NEXT:    s_bitset0_b64 s[6:7], s11
1902; GFX9-SDAG-NEXT:    s_max_u32 s10, s10, s12
1903; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[6:7], 0
1904; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB14_4
1905; GFX9-SDAG-NEXT:  ; %bb.5:
1906; GFX9-SDAG-NEXT:    s_mov_b32 s6, s32
1907; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s6
1908; GFX9-SDAG-NEXT:    v_lshl_add_u32 v1, s10, 6, v1
1909; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v1
1910; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 3
1911; GFX9-SDAG-NEXT:    v_mov_b32_e32 v2, s9
1912; GFX9-SDAG-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1913; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1914; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 4
1915; GFX9-SDAG-NEXT:    buffer_store_dword v1, off, s[0:3], s6
1916; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1917; GFX9-SDAG-NEXT:  .LBB14_6: ; %bb.1
1918; GFX9-SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
1919; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
1920; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, 2
1921; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
1922; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
1923; GFX9-SDAG-NEXT:  .LBB14_7: ; =>This Inner Loop Header: Depth=1
1924; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s6, s[4:5]
1925; GFX9-SDAG-NEXT:    v_readlane_b32 s7, v0, s6
1926; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s6
1927; GFX9-SDAG-NEXT:    s_max_u32 s8, s8, s7
1928; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
1929; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB14_7
1930; GFX9-SDAG-NEXT:  ; %bb.8:
1931; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
1932; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
1933; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s8, 6, v0
1934; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
1935; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 1
1936; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s33
1937; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1938; GFX9-SDAG-NEXT:    buffer_store_dword v1, off, s[0:3], s4
1939; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
1940; GFX9-SDAG-NEXT:    s_mov_b32 s32, s34
1941; GFX9-SDAG-NEXT:    s_mov_b32 s34, s14
1942; GFX9-SDAG-NEXT:    s_mov_b32 s33, s13
1943; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
1944;
1945; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas:
1946; GFX9-GISEL:       ; %bb.0: ; %entry
1947; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1948; GFX9-GISEL-NEXT:    s_mov_b32 s13, s33
1949; GFX9-GISEL-NEXT:    s_add_i32 s33, s32, 0xfc0
1950; GFX9-GISEL-NEXT:    s_mov_b32 s14, s34
1951; GFX9-GISEL-NEXT:    s_mov_b32 s8, 0
1952; GFX9-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1953; GFX9-GISEL-NEXT:    s_and_b32 s33, s33, 0xfffff000
1954; GFX9-GISEL-NEXT:    s_mov_b32 s34, s32
1955; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x3000
1956; GFX9-GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1957; GFX9-GISEL-NEXT:    s_cbranch_execz .LBB14_6
1958; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb.0
1959; GFX9-GISEL-NEXT:    v_lshl_add_u32 v1, v1, 2, 15
1960; GFX9-GISEL-NEXT:    v_and_b32_e32 v2, 0x3ff, v31
1961; GFX9-GISEL-NEXT:    v_and_b32_e32 v1, -16, v1
1962; GFX9-GISEL-NEXT:    s_mov_b64 s[6:7], exec
1963; GFX9-GISEL-NEXT:    s_mov_b32 s9, 0
1964; GFX9-GISEL-NEXT:  .LBB14_2: ; =>This Inner Loop Header: Depth=1
1965; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s10, s[6:7]
1966; GFX9-GISEL-NEXT:    v_readlane_b32 s11, v1, s10
1967; GFX9-GISEL-NEXT:    s_bitset0_b64 s[6:7], s10
1968; GFX9-GISEL-NEXT:    s_max_u32 s9, s9, s11
1969; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
1970; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB14_2
1971; GFX9-GISEL-NEXT:  ; %bb.3:
1972; GFX9-GISEL-NEXT:    s_add_u32 s7, s32, 0xfff
1973; GFX9-GISEL-NEXT:    s_lshl_b32 s6, s9, 6
1974; GFX9-GISEL-NEXT:    s_and_b32 s9, s7, 0xfffff000
1975; GFX9-GISEL-NEXT:    v_lshl_add_u32 v1, v2, 2, 15
1976; GFX9-GISEL-NEXT:    s_add_u32 s32, s9, s6
1977; GFX9-GISEL-NEXT:    v_and_b32_e32 v1, -16, v1
1978; GFX9-GISEL-NEXT:    s_mov_b64 s[6:7], exec
1979; GFX9-GISEL-NEXT:    s_mov_b32 s10, 0
1980; GFX9-GISEL-NEXT:  .LBB14_4: ; =>This Inner Loop Header: Depth=1
1981; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s11, s[6:7]
1982; GFX9-GISEL-NEXT:    v_readlane_b32 s12, v1, s11
1983; GFX9-GISEL-NEXT:    s_bitset0_b64 s[6:7], s11
1984; GFX9-GISEL-NEXT:    s_max_u32 s10, s10, s12
1985; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
1986; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB14_4
1987; GFX9-GISEL-NEXT:  ; %bb.5:
1988; GFX9-GISEL-NEXT:    s_mov_b32 s6, s32
1989; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 3
1990; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, s9
1991; GFX9-GISEL-NEXT:    s_lshl_b32 s7, s10, 6
1992; GFX9-GISEL-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1993; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1994; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 4
1995; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, s6
1996; GFX9-GISEL-NEXT:    s_add_u32 s32, s6, s7
1997; GFX9-GISEL-NEXT:    buffer_store_dword v1, v2, s[0:3], 0 offen
1998; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
1999; GFX9-GISEL-NEXT:  .LBB14_6: ; %bb.1
2000; GFX9-GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
2001; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2002; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2003; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
2004; GFX9-GISEL-NEXT:  .LBB14_7: ; =>This Inner Loop Header: Depth=1
2005; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s6, s[4:5]
2006; GFX9-GISEL-NEXT:    v_readlane_b32 s7, v0, s6
2007; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s6
2008; GFX9-GISEL-NEXT:    s_max_u32 s8, s8, s7
2009; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
2010; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB14_7
2011; GFX9-GISEL-NEXT:  ; %bb.8:
2012; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
2013; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s8, 6
2014; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 1
2015; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
2016; GFX9-GISEL-NEXT:    buffer_store_dword v0, off, s[0:3], s33
2017; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2018; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 2
2019; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
2020; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
2021; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2022; GFX9-GISEL-NEXT:    s_mov_b32 s32, s34
2023; GFX9-GISEL-NEXT:    s_mov_b32 s34, s14
2024; GFX9-GISEL-NEXT:    s_mov_b32 s33, s13
2025; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
2026;
2027; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_multiple_allocas:
2028; GFX11-SDAG:       ; %bb.0: ; %entry
2029; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2030; GFX11-SDAG-NEXT:    s_mov_b32 s7, s33
2031; GFX11-SDAG-NEXT:    s_add_i32 s33, s32, 63
2032; GFX11-SDAG-NEXT:    s_mov_b32 s8, s34
2033; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
2034; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
2035; GFX11-SDAG-NEXT:    s_and_not1_b32 s33, s33, 63
2036; GFX11-SDAG-NEXT:    s_mov_b32 s34, s32
2037; GFX11-SDAG-NEXT:    s_addk_i32 s32, 0xc0
2038; GFX11-SDAG-NEXT:    v_cmpx_eq_u32_e32 0, v0
2039; GFX11-SDAG-NEXT:    s_cbranch_execz .LBB14_6
2040; GFX11-SDAG-NEXT:  ; %bb.1: ; %bb.0
2041; GFX11-SDAG-NEXT:    v_lshl_add_u32 v1, v1, 2, 15
2042; GFX11-SDAG-NEXT:    s_mov_b32 s2, exec_lo
2043; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0
2044; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2045; GFX11-SDAG-NEXT:    v_and_b32_e32 v1, -16, v1
2046; GFX11-SDAG-NEXT:  .LBB14_2: ; =>This Inner Loop Header: Depth=1
2047; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s4, s2
2048; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2049; GFX11-SDAG-NEXT:    v_readlane_b32 s5, v1, s4
2050; GFX11-SDAG-NEXT:    s_bitset0_b32 s2, s4
2051; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2052; GFX11-SDAG-NEXT:    s_max_u32 s3, s3, s5
2053; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
2054; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB14_2
2055; GFX11-SDAG-NEXT:  ; %bb.3:
2056; GFX11-SDAG-NEXT:    v_and_b32_e32 v1, 0x3ff, v31
2057; GFX11-SDAG-NEXT:    s_add_i32 s2, s32, 0x7ff
2058; GFX11-SDAG-NEXT:    s_mov_b32 s4, exec_lo
2059; GFX11-SDAG-NEXT:    s_and_b32 s2, s2, 0xfffff800
2060; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
2061; GFX11-SDAG-NEXT:    v_lshl_add_u32 v2, s3, 5, s2
2062; GFX11-SDAG-NEXT:    v_lshl_add_u32 v1, v1, 2, 15
2063; GFX11-SDAG-NEXT:    s_mov_b32 s3, 0
2064; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v2
2065; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2066; GFX11-SDAG-NEXT:    v_and_b32_e32 v1, 0x1ff0, v1
2067; GFX11-SDAG-NEXT:  .LBB14_4: ; =>This Inner Loop Header: Depth=1
2068; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s5, s4
2069; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2070; GFX11-SDAG-NEXT:    v_readlane_b32 s6, v1, s5
2071; GFX11-SDAG-NEXT:    s_bitset0_b32 s4, s5
2072; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2073; GFX11-SDAG-NEXT:    s_max_u32 s3, s3, s6
2074; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s4, 0
2075; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB14_4
2076; GFX11-SDAG-NEXT:  ; %bb.5:
2077; GFX11-SDAG-NEXT:    s_mov_b32 s4, s32
2078; GFX11-SDAG-NEXT:    v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4
2079; GFX11-SDAG-NEXT:    v_lshl_add_u32 v1, s3, 5, s4
2080; GFX11-SDAG-NEXT:    scratch_store_b32 off, v2, s2 dlc
2081; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
2082; GFX11-SDAG-NEXT:    scratch_store_b32 off, v3, s4 dlc
2083; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
2084; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v1
2085; GFX11-SDAG-NEXT:  .LBB14_6: ; %bb.1
2086; GFX11-SDAG-NEXT:    s_or_b32 exec_lo, exec_lo, s1
2087; GFX11-SDAG-NEXT:    v_lshl_add_u32 v1, v0, 2, 15
2088; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 2
2089; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
2090; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2091; GFX11-SDAG-NEXT:    v_and_b32_e32 v1, -16, v1
2092; GFX11-SDAG-NEXT:  .LBB14_7: ; =>This Inner Loop Header: Depth=1
2093; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
2094; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2095; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v1, s2
2096; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
2097; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2098; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
2099; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
2100; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB14_7
2101; GFX11-SDAG-NEXT:  ; %bb.8:
2102; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
2103; GFX11-SDAG-NEXT:    v_mov_b32_e32 v2, 1
2104; GFX11-SDAG-NEXT:    v_lshl_add_u32 v1, s0, 5, s1
2105; GFX11-SDAG-NEXT:    scratch_store_b32 off, v2, s33 dlc
2106; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
2107; GFX11-SDAG-NEXT:    scratch_store_b32 off, v0, s1 dlc
2108; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
2109; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v1
2110; GFX11-SDAG-NEXT:    s_mov_b32 s32, s34
2111; GFX11-SDAG-NEXT:    s_mov_b32 s34, s8
2112; GFX11-SDAG-NEXT:    s_mov_b32 s33, s7
2113; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
2114;
2115; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_multiple_allocas:
2116; GFX11-GISEL:       ; %bb.0: ; %entry
2117; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2118; GFX11-GISEL-NEXT:    s_mov_b32 s7, s33
2119; GFX11-GISEL-NEXT:    s_add_i32 s33, s32, 63
2120; GFX11-GISEL-NEXT:    s_mov_b32 s8, s34
2121; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
2122; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
2123; GFX11-GISEL-NEXT:    s_and_not1_b32 s33, s33, 63
2124; GFX11-GISEL-NEXT:    s_mov_b32 s34, s32
2125; GFX11-GISEL-NEXT:    s_addk_i32 s32, 0xc0
2126; GFX11-GISEL-NEXT:    v_cmpx_eq_u32_e32 0, v0
2127; GFX11-GISEL-NEXT:    s_cbranch_execz .LBB14_6
2128; GFX11-GISEL-NEXT:  ; %bb.1: ; %bb.0
2129; GFX11-GISEL-NEXT:    v_lshl_add_u32 v2, v1, 2, 15
2130; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, 0x3ff, v31
2131; GFX11-GISEL-NEXT:    s_mov_b32 s3, exec_lo
2132; GFX11-GISEL-NEXT:    s_mov_b32 s2, 0
2133; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2134; GFX11-GISEL-NEXT:    v_and_b32_e32 v2, -16, v2
2135; GFX11-GISEL-NEXT:  .LBB14_2: ; =>This Inner Loop Header: Depth=1
2136; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s4, s3
2137; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2138; GFX11-GISEL-NEXT:    v_readlane_b32 s5, v2, s4
2139; GFX11-GISEL-NEXT:    s_bitset0_b32 s3, s4
2140; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2141; GFX11-GISEL-NEXT:    s_max_u32 s2, s2, s5
2142; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s3, 0
2143; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB14_2
2144; GFX11-GISEL-NEXT:  ; %bb.3:
2145; GFX11-GISEL-NEXT:    v_lshl_add_u32 v1, v1, 2, 15
2146; GFX11-GISEL-NEXT:    s_lshl_b32 s5, s2, 5
2147; GFX11-GISEL-NEXT:    s_add_u32 s2, s32, 0x7ff
2148; GFX11-GISEL-NEXT:    s_mov_b32 s4, exec_lo
2149; GFX11-GISEL-NEXT:    s_and_b32 s2, s2, 0xfffff800
2150; GFX11-GISEL-NEXT:    v_and_b32_e32 v1, -16, v1
2151; GFX11-GISEL-NEXT:    s_mov_b32 s3, 0
2152; GFX11-GISEL-NEXT:    s_add_u32 s32, s2, s5
2153; GFX11-GISEL-NEXT:  .LBB14_4: ; =>This Inner Loop Header: Depth=1
2154; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s5, s4
2155; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2156; GFX11-GISEL-NEXT:    v_readlane_b32 s6, v1, s5
2157; GFX11-GISEL-NEXT:    s_bitset0_b32 s4, s5
2158; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2159; GFX11-GISEL-NEXT:    s_max_u32 s3, s3, s6
2160; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s4, 0
2161; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB14_4
2162; GFX11-GISEL-NEXT:  ; %bb.5:
2163; GFX11-GISEL-NEXT:    v_dual_mov_b32 v1, 3 :: v_dual_mov_b32 v2, 4
2164; GFX11-GISEL-NEXT:    s_mov_b32 s4, s32
2165; GFX11-GISEL-NEXT:    s_lshl_b32 s3, s3, 5
2166; GFX11-GISEL-NEXT:    scratch_store_b32 off, v1, s2 dlc
2167; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
2168; GFX11-GISEL-NEXT:    scratch_store_b32 off, v2, s4 dlc
2169; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
2170; GFX11-GISEL-NEXT:    s_add_u32 s32, s4, s3
2171; GFX11-GISEL-NEXT:  .LBB14_6: ; %bb.1
2172; GFX11-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
2173; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2174; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
2175; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2176; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2177; GFX11-GISEL-NEXT:  .LBB14_7: ; =>This Inner Loop Header: Depth=1
2178; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
2179; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2180; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
2181; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
2182; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2183; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
2184; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
2185; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB14_7
2186; GFX11-GISEL-NEXT:  ; %bb.8:
2187; GFX11-GISEL-NEXT:    v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2
2188; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
2189; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
2190; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s33 dlc
2191; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
2192; GFX11-GISEL-NEXT:    scratch_store_b32 off, v1, s1 dlc
2193; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
2194; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
2195; GFX11-GISEL-NEXT:    s_mov_b32 s32, s34
2196; GFX11-GISEL-NEXT:    s_mov_b32 s34, s8
2197; GFX11-GISEL-NEXT:    s_mov_b32 s33, s7
2198; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
2199entry:
2200  %cond = icmp eq i32 %n, 0
2201  %alloca1 = alloca i32, i32 8, addrspace(5)
2202  %alloca2 = alloca i32, i32 %n, addrspace(5)
2203  br i1 %cond, label %bb.0, label %bb.1
2204bb.0:
2205  %idx = call i32 @llvm.amdgcn.workitem.id.x()
2206  %alloca3 = alloca i32, i32 %m, align 64, addrspace(5)
2207  %alloca4 = alloca i32, i32 %idx, align 4, addrspace(5)
2208  store volatile i32 3, ptr addrspace(5) %alloca3
2209  store volatile i32 4, ptr addrspace(5) %alloca4
2210  br label %bb.1
2211bb.1:
2212  store volatile i32 1, ptr addrspace(5) %alloca1
2213  store volatile i32 2, ptr addrspace(5) %alloca2
2214  ret void
2215}
2216
2217define void @test_dynamic_stackalloc_device_control_flow(i32 %n, i32 %m) {
2218; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_control_flow:
2219; GFX9-SDAG:       ; %bb.0: ; %entry
2220; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2221; GFX9-SDAG-NEXT:    s_mov_b32 s11, s33
2222; GFX9-SDAG-NEXT:    s_add_i32 s33, s32, 0xfc0
2223; GFX9-SDAG-NEXT:    s_mov_b32 s12, s34
2224; GFX9-SDAG-NEXT:    s_mov_b32 s8, 0
2225; GFX9-SDAG-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
2226; GFX9-SDAG-NEXT:    s_and_b32 s33, s33, 0xfffff000
2227; GFX9-SDAG-NEXT:    s_mov_b32 s34, s32
2228; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x2000
2229; GFX9-SDAG-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2230; GFX9-SDAG-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
2231; GFX9-SDAG-NEXT:    s_cbranch_execz .LBB15_4
2232; GFX9-SDAG-NEXT:  ; %bb.1: ; %bb.1
2233; GFX9-SDAG-NEXT:    v_lshl_add_u32 v1, v1, 2, 15
2234; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 2
2235; GFX9-SDAG-NEXT:    v_and_b32_e32 v1, -16, v1
2236; GFX9-SDAG-NEXT:    s_mov_b64 s[6:7], exec
2237; GFX9-SDAG-NEXT:  .LBB15_2: ; =>This Inner Loop Header: Depth=1
2238; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s9, s[6:7]
2239; GFX9-SDAG-NEXT:    v_readlane_b32 s10, v1, s9
2240; GFX9-SDAG-NEXT:    s_bitset0_b64 s[6:7], s9
2241; GFX9-SDAG-NEXT:    s_max_u32 s8, s8, s10
2242; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[6:7], 0
2243; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB15_2
2244; GFX9-SDAG-NEXT:  ; %bb.3:
2245; GFX9-SDAG-NEXT:    s_add_i32 s6, s32, 0xfff
2246; GFX9-SDAG-NEXT:    s_and_b32 s6, s6, 0xfffff000
2247; GFX9-SDAG-NEXT:    v_mov_b32_e32 v1, s6
2248; GFX9-SDAG-NEXT:    v_lshl_add_u32 v2, s8, 6, v1
2249; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v2
2250; GFX9-SDAG-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
2251; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
2252; GFX9-SDAG-NEXT:    ; implicit-def: $vgpr31
2253; GFX9-SDAG-NEXT:  .LBB15_4: ; %Flow
2254; GFX9-SDAG-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
2255; GFX9-SDAG-NEXT:    s_cbranch_execz .LBB15_8
2256; GFX9-SDAG-NEXT:  ; %bb.5: ; %bb.0
2257; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
2258; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2259; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
2260; GFX9-SDAG-NEXT:    s_mov_b64 s[6:7], exec
2261; GFX9-SDAG-NEXT:    s_mov_b32 s8, 0
2262; GFX9-SDAG-NEXT:  .LBB15_6: ; =>This Inner Loop Header: Depth=1
2263; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s9, s[6:7]
2264; GFX9-SDAG-NEXT:    v_readlane_b32 s10, v0, s9
2265; GFX9-SDAG-NEXT:    s_bitset0_b64 s[6:7], s9
2266; GFX9-SDAG-NEXT:    s_max_u32 s8, s8, s10
2267; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[6:7], 0
2268; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB15_6
2269; GFX9-SDAG-NEXT:  ; %bb.7:
2270; GFX9-SDAG-NEXT:    s_mov_b32 s6, s32
2271; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s6
2272; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s8, 6, v0
2273; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
2274; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 1
2275; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s6
2276; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
2277; GFX9-SDAG-NEXT:  .LBB15_8: ; %bb.2
2278; GFX9-SDAG-NEXT:    s_or_b64 exec, exec, s[4:5]
2279; GFX9-SDAG-NEXT:    s_mov_b32 s32, s34
2280; GFX9-SDAG-NEXT:    s_mov_b32 s34, s12
2281; GFX9-SDAG-NEXT:    s_mov_b32 s33, s11
2282; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
2283; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
2284;
2285; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_control_flow:
2286; GFX9-GISEL:       ; %bb.0: ; %entry
2287; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2288; GFX9-GISEL-NEXT:    s_mov_b32 s11, s33
2289; GFX9-GISEL-NEXT:    s_add_i32 s33, s32, 0xfc0
2290; GFX9-GISEL-NEXT:    s_mov_b32 s12, s34
2291; GFX9-GISEL-NEXT:    s_mov_b32 s8, 0
2292; GFX9-GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
2293; GFX9-GISEL-NEXT:    s_and_b32 s33, s33, 0xfffff000
2294; GFX9-GISEL-NEXT:    s_mov_b32 s34, s32
2295; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x2000
2296; GFX9-GISEL-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2297; GFX9-GISEL-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
2298; GFX9-GISEL-NEXT:    s_cbranch_execz .LBB15_4
2299; GFX9-GISEL-NEXT:  ; %bb.1: ; %bb.1
2300; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v1, 2, 15
2301; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2302; GFX9-GISEL-NEXT:    s_mov_b64 s[6:7], exec
2303; GFX9-GISEL-NEXT:  .LBB15_2: ; =>This Inner Loop Header: Depth=1
2304; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s9, s[6:7]
2305; GFX9-GISEL-NEXT:    v_readlane_b32 s10, v0, s9
2306; GFX9-GISEL-NEXT:    s_bitset0_b64 s[6:7], s9
2307; GFX9-GISEL-NEXT:    s_max_u32 s8, s8, s10
2308; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
2309; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB15_2
2310; GFX9-GISEL-NEXT:  ; %bb.3:
2311; GFX9-GISEL-NEXT:    s_add_u32 s7, s32, 0xfff
2312; GFX9-GISEL-NEXT:    s_and_b32 s7, s7, 0xfffff000
2313; GFX9-GISEL-NEXT:    s_lshl_b32 s6, s8, 6
2314; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 2
2315; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s7
2316; GFX9-GISEL-NEXT:    s_add_u32 s32, s7, s6
2317; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
2318; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2319; GFX9-GISEL-NEXT:    ; implicit-def: $vgpr31
2320; GFX9-GISEL-NEXT:  .LBB15_4: ; %Flow
2321; GFX9-GISEL-NEXT:    s_andn2_saveexec_b64 s[4:5], s[4:5]
2322; GFX9-GISEL-NEXT:    s_cbranch_execz .LBB15_8
2323; GFX9-GISEL-NEXT:  ; %bb.5: ; %bb.0
2324; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
2325; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2326; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2327; GFX9-GISEL-NEXT:    s_mov_b64 s[6:7], exec
2328; GFX9-GISEL-NEXT:    s_mov_b32 s8, 0
2329; GFX9-GISEL-NEXT:  .LBB15_6: ; =>This Inner Loop Header: Depth=1
2330; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s9, s[6:7]
2331; GFX9-GISEL-NEXT:    v_readlane_b32 s10, v0, s9
2332; GFX9-GISEL-NEXT:    s_bitset0_b64 s[6:7], s9
2333; GFX9-GISEL-NEXT:    s_max_u32 s8, s8, s10
2334; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[6:7], 0
2335; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB15_6
2336; GFX9-GISEL-NEXT:  ; %bb.7:
2337; GFX9-GISEL-NEXT:    s_mov_b32 s6, s32
2338; GFX9-GISEL-NEXT:    s_lshl_b32 s7, s8, 6
2339; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 1
2340; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s6
2341; GFX9-GISEL-NEXT:    s_add_u32 s32, s6, s7
2342; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
2343; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2344; GFX9-GISEL-NEXT:  .LBB15_8: ; %bb.2
2345; GFX9-GISEL-NEXT:    s_or_b64 exec, exec, s[4:5]
2346; GFX9-GISEL-NEXT:    s_mov_b32 s32, s34
2347; GFX9-GISEL-NEXT:    s_mov_b32 s34, s12
2348; GFX9-GISEL-NEXT:    s_mov_b32 s33, s11
2349; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2350; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
2351;
2352; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_control_flow:
2353; GFX11-SDAG:       ; %bb.0: ; %entry
2354; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2355; GFX11-SDAG-NEXT:    s_mov_b32 s5, s33
2356; GFX11-SDAG-NEXT:    s_add_i32 s33, s32, 63
2357; GFX11-SDAG-NEXT:    s_mov_b32 s6, s34
2358; GFX11-SDAG-NEXT:    s_mov_b32 s1, 0
2359; GFX11-SDAG-NEXT:    s_mov_b32 s0, exec_lo
2360; GFX11-SDAG-NEXT:    s_and_not1_b32 s33, s33, 63
2361; GFX11-SDAG-NEXT:    s_mov_b32 s34, s32
2362; GFX11-SDAG-NEXT:    s_addk_i32 s32, 0x80
2363; GFX11-SDAG-NEXT:    v_cmpx_ne_u32_e32 0, v0
2364; GFX11-SDAG-NEXT:    s_xor_b32 s0, exec_lo, s0
2365; GFX11-SDAG-NEXT:    s_cbranch_execz .LBB15_4
2366; GFX11-SDAG-NEXT:  ; %bb.1: ; %bb.1
2367; GFX11-SDAG-NEXT:    v_lshl_add_u32 v1, v1, 2, 15
2368; GFX11-SDAG-NEXT:    v_mov_b32_e32 v0, 2
2369; GFX11-SDAG-NEXT:    s_mov_b32 s2, exec_lo
2370; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2371; GFX11-SDAG-NEXT:    v_and_b32_e32 v1, -16, v1
2372; GFX11-SDAG-NEXT:  .LBB15_2: ; =>This Inner Loop Header: Depth=1
2373; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s3, s2
2374; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2375; GFX11-SDAG-NEXT:    v_readlane_b32 s4, v1, s3
2376; GFX11-SDAG-NEXT:    s_bitset0_b32 s2, s3
2377; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2378; GFX11-SDAG-NEXT:    s_max_u32 s1, s1, s4
2379; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
2380; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB15_2
2381; GFX11-SDAG-NEXT:  ; %bb.3:
2382; GFX11-SDAG-NEXT:    s_add_i32 s2, s32, 0x7ff
2383; GFX11-SDAG-NEXT:    ; implicit-def: $vgpr31
2384; GFX11-SDAG-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
2385; GFX11-SDAG-NEXT:    s_and_b32 s2, s2, 0xfffff800
2386; GFX11-SDAG-NEXT:    v_lshl_add_u32 v1, s1, 5, s2
2387; GFX11-SDAG-NEXT:    scratch_store_b32 off, v0, s2 dlc
2388; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
2389; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v1
2390; GFX11-SDAG-NEXT:  .LBB15_4: ; %Flow
2391; GFX11-SDAG-NEXT:    s_and_not1_saveexec_b32 s0, s0
2392; GFX11-SDAG-NEXT:    s_cbranch_execz .LBB15_8
2393; GFX11-SDAG-NEXT:  ; %bb.5: ; %bb.0
2394; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
2395; GFX11-SDAG-NEXT:    s_mov_b32 s2, exec_lo
2396; GFX11-SDAG-NEXT:    s_mov_b32 s1, 0
2397; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2398; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2399; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x1ff0, v0
2400; GFX11-SDAG-NEXT:  .LBB15_6: ; =>This Inner Loop Header: Depth=1
2401; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s3, s2
2402; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2403; GFX11-SDAG-NEXT:    v_readlane_b32 s4, v0, s3
2404; GFX11-SDAG-NEXT:    s_bitset0_b32 s2, s3
2405; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2406; GFX11-SDAG-NEXT:    s_max_u32 s1, s1, s4
2407; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s2, 0
2408; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB15_6
2409; GFX11-SDAG-NEXT:  ; %bb.7:
2410; GFX11-SDAG-NEXT:    s_mov_b32 s2, s32
2411; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 1
2412; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s1, 5, s2
2413; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s2 dlc
2414; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
2415; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
2416; GFX11-SDAG-NEXT:  .LBB15_8: ; %bb.2
2417; GFX11-SDAG-NEXT:    s_or_b32 exec_lo, exec_lo, s0
2418; GFX11-SDAG-NEXT:    s_mov_b32 s32, s34
2419; GFX11-SDAG-NEXT:    s_mov_b32 s34, s6
2420; GFX11-SDAG-NEXT:    s_mov_b32 s33, s5
2421; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
2422;
2423; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_control_flow:
2424; GFX11-GISEL:       ; %bb.0: ; %entry
2425; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2426; GFX11-GISEL-NEXT:    s_mov_b32 s5, s33
2427; GFX11-GISEL-NEXT:    s_add_i32 s33, s32, 63
2428; GFX11-GISEL-NEXT:    s_mov_b32 s6, s34
2429; GFX11-GISEL-NEXT:    s_mov_b32 s1, 0
2430; GFX11-GISEL-NEXT:    s_mov_b32 s0, exec_lo
2431; GFX11-GISEL-NEXT:    s_and_not1_b32 s33, s33, 63
2432; GFX11-GISEL-NEXT:    s_mov_b32 s34, s32
2433; GFX11-GISEL-NEXT:    s_addk_i32 s32, 0x80
2434; GFX11-GISEL-NEXT:    v_cmpx_ne_u32_e32 0, v0
2435; GFX11-GISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
2436; GFX11-GISEL-NEXT:    s_cbranch_execz .LBB15_4
2437; GFX11-GISEL-NEXT:  ; %bb.1: ; %bb.1
2438; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v1, 2, 15
2439; GFX11-GISEL-NEXT:    s_mov_b32 s2, exec_lo
2440; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2441; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2442; GFX11-GISEL-NEXT:  .LBB15_2: ; =>This Inner Loop Header: Depth=1
2443; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s3, s2
2444; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2445; GFX11-GISEL-NEXT:    v_readlane_b32 s4, v0, s3
2446; GFX11-GISEL-NEXT:    s_bitset0_b32 s2, s3
2447; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2448; GFX11-GISEL-NEXT:    s_max_u32 s1, s1, s4
2449; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
2450; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB15_2
2451; GFX11-GISEL-NEXT:  ; %bb.3:
2452; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 2
2453; GFX11-GISEL-NEXT:    s_add_u32 s2, s32, 0x7ff
2454; GFX11-GISEL-NEXT:    s_lshl_b32 s1, s1, 5
2455; GFX11-GISEL-NEXT:    s_and_b32 s2, s2, 0xfffff800
2456; GFX11-GISEL-NEXT:    ; implicit-def: $vgpr31
2457; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2458; GFX11-GISEL-NEXT:    s_add_u32 s32, s2, s1
2459; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s2 dlc
2460; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
2461; GFX11-GISEL-NEXT:  .LBB15_4: ; %Flow
2462; GFX11-GISEL-NEXT:    s_and_not1_saveexec_b32 s0, s0
2463; GFX11-GISEL-NEXT:    s_cbranch_execz .LBB15_8
2464; GFX11-GISEL-NEXT:  ; %bb.5: ; %bb.0
2465; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v31
2466; GFX11-GISEL-NEXT:    s_mov_b32 s2, exec_lo
2467; GFX11-GISEL-NEXT:    s_mov_b32 s1, 0
2468; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2469; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2470; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2471; GFX11-GISEL-NEXT:  .LBB15_6: ; =>This Inner Loop Header: Depth=1
2472; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s3, s2
2473; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2474; GFX11-GISEL-NEXT:    v_readlane_b32 s4, v0, s3
2475; GFX11-GISEL-NEXT:    s_bitset0_b32 s2, s3
2476; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2477; GFX11-GISEL-NEXT:    s_max_u32 s1, s1, s4
2478; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
2479; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB15_6
2480; GFX11-GISEL-NEXT:  ; %bb.7:
2481; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 1
2482; GFX11-GISEL-NEXT:    s_mov_b32 s2, s32
2483; GFX11-GISEL-NEXT:    s_lshl_b32 s1, s1, 5
2484; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2485; GFX11-GISEL-NEXT:    s_add_u32 s32, s2, s1
2486; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s2 dlc
2487; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
2488; GFX11-GISEL-NEXT:  .LBB15_8: ; %bb.2
2489; GFX11-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
2490; GFX11-GISEL-NEXT:    s_mov_b32 s32, s34
2491; GFX11-GISEL-NEXT:    s_mov_b32 s34, s6
2492; GFX11-GISEL-NEXT:    s_mov_b32 s33, s5
2493; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
2494entry:
2495  %cond = icmp eq i32 %n, 0
2496  br i1 %cond, label %bb.0, label %bb.1
2497bb.0:
2498  %idx = call i32 @llvm.amdgcn.workitem.id.x()
2499  %alloca1 = alloca i32, i32 %idx, align 4, addrspace(5)
2500  store volatile i32 1, ptr addrspace(5) %alloca1
2501  br label %bb.2
2502bb.1:
2503  %alloca2 = alloca i32, i32 %m, align 64, addrspace(5)
2504  store volatile i32 2, ptr addrspace(5) %alloca2
2505  br label %bb.2
2506bb.2:
2507  ret void
2508}
2509
2510define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i16(i16 %n) {
2511; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16:
2512; GFX9-SDAG:       ; %bb.0:
2513; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2514; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2515; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2516; GFX9-SDAG-NEXT:    s_mov_b32 s9, s33
2517; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, 0x7fff0, v0
2518; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
2519; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
2520; GFX9-SDAG-NEXT:    s_mov_b32 s33, s32
2521; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x400
2522; GFX9-SDAG-NEXT:  .LBB16_1: ; =>This Inner Loop Header: Depth=1
2523; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
2524; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
2525; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
2526; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
2527; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
2528; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB16_1
2529; GFX9-SDAG-NEXT:  ; %bb.2:
2530; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
2531; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
2532; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s6, 6, v0
2533; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
2534; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x29a
2535; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s4
2536; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
2537; GFX9-SDAG-NEXT:    s_mov_b32 s32, s33
2538; GFX9-SDAG-NEXT:    s_mov_b32 s33, s9
2539; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
2540;
2541; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16:
2542; GFX9-GISEL:       ; %bb.0:
2543; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2544; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2545; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2546; GFX9-GISEL-NEXT:    s_mov_b32 s9, s33
2547; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2548; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
2549; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
2550; GFX9-GISEL-NEXT:    s_mov_b32 s33, s32
2551; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x400
2552; GFX9-GISEL-NEXT:  .LBB16_1: ; =>This Inner Loop Header: Depth=1
2553; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
2554; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
2555; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
2556; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
2557; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
2558; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB16_1
2559; GFX9-GISEL-NEXT:  ; %bb.2:
2560; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
2561; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s6, 6
2562; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
2563; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x29a
2564; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
2565; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
2566; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2567; GFX9-GISEL-NEXT:    s_mov_b32 s32, s33
2568; GFX9-GISEL-NEXT:    s_mov_b32 s33, s9
2569; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
2570;
2571; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16:
2572; GFX11-SDAG:       ; %bb.0:
2573; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2574; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2575; GFX11-SDAG-NEXT:    s_mov_b32 s4, s33
2576; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
2577; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
2578; GFX11-SDAG-NEXT:    s_mov_b32 s33, s32
2579; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2580; GFX11-SDAG-NEXT:    s_add_i32 s32, s32, 16
2581; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2582; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, 0x7fff0, v0
2583; GFX11-SDAG-NEXT:  .LBB16_1: ; =>This Inner Loop Header: Depth=1
2584; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
2585; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2586; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
2587; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
2588; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2589; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
2590; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
2591; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB16_1
2592; GFX11-SDAG-NEXT:  ; %bb.2:
2593; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
2594; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x29a
2595; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
2596; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
2597; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
2598; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
2599; GFX11-SDAG-NEXT:    s_mov_b32 s32, s33
2600; GFX11-SDAG-NEXT:    s_mov_b32 s33, s4
2601; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
2602;
2603; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i16:
2604; GFX11-GISEL:       ; %bb.0:
2605; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2606; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
2607; GFX11-GISEL-NEXT:    s_mov_b32 s4, s33
2608; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
2609; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
2610; GFX11-GISEL-NEXT:    s_mov_b32 s33, s32
2611; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2612; GFX11-GISEL-NEXT:    s_add_i32 s32, s32, 16
2613; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2614; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2615; GFX11-GISEL-NEXT:  .LBB16_1: ; =>This Inner Loop Header: Depth=1
2616; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
2617; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2618; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
2619; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
2620; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2621; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
2622; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
2623; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB16_1
2624; GFX11-GISEL-NEXT:  ; %bb.2:
2625; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x29a
2626; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
2627; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
2628; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2629; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
2630; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
2631; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
2632; GFX11-GISEL-NEXT:    s_mov_b32 s32, s33
2633; GFX11-GISEL-NEXT:    s_mov_b32 s33, s4
2634; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
2635  %alloca = alloca i32, i16 %n, align 2, addrspace(5)
2636  store volatile i32 666, ptr addrspace(5) %alloca
2637  ret void
2638}
2639
2640define void @test_dynamic_stackalloc_device_divergent_non_standard_size_i64(i64 %n) {
2641; GFX9-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64:
2642; GFX9-SDAG:       ; %bb.0:
2643; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2644; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2645; GFX9-SDAG-NEXT:    s_mov_b32 s9, s33
2646; GFX9-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
2647; GFX9-SDAG-NEXT:    s_mov_b64 s[4:5], exec
2648; GFX9-SDAG-NEXT:    s_mov_b32 s6, 0
2649; GFX9-SDAG-NEXT:    s_mov_b32 s33, s32
2650; GFX9-SDAG-NEXT:    s_addk_i32 s32, 0x400
2651; GFX9-SDAG-NEXT:  .LBB17_1: ; =>This Inner Loop Header: Depth=1
2652; GFX9-SDAG-NEXT:    s_ff1_i32_b64 s7, s[4:5]
2653; GFX9-SDAG-NEXT:    v_readlane_b32 s8, v0, s7
2654; GFX9-SDAG-NEXT:    s_bitset0_b64 s[4:5], s7
2655; GFX9-SDAG-NEXT:    s_max_u32 s6, s6, s8
2656; GFX9-SDAG-NEXT:    s_cmp_lg_u64 s[4:5], 0
2657; GFX9-SDAG-NEXT:    s_cbranch_scc1 .LBB17_1
2658; GFX9-SDAG-NEXT:  ; %bb.2:
2659; GFX9-SDAG-NEXT:    s_mov_b32 s4, s32
2660; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, s4
2661; GFX9-SDAG-NEXT:    v_lshl_add_u32 v0, s6, 6, v0
2662; GFX9-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
2663; GFX9-SDAG-NEXT:    v_mov_b32_e32 v0, 0x29a
2664; GFX9-SDAG-NEXT:    buffer_store_dword v0, off, s[0:3], s4
2665; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0)
2666; GFX9-SDAG-NEXT:    s_mov_b32 s32, s33
2667; GFX9-SDAG-NEXT:    s_mov_b32 s33, s9
2668; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
2669;
2670; GFX9-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64:
2671; GFX9-GISEL:       ; %bb.0:
2672; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2673; GFX9-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2674; GFX9-GISEL-NEXT:    s_mov_b32 s9, s33
2675; GFX9-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2676; GFX9-GISEL-NEXT:    s_mov_b64 s[4:5], exec
2677; GFX9-GISEL-NEXT:    s_mov_b32 s6, 0
2678; GFX9-GISEL-NEXT:    s_mov_b32 s33, s32
2679; GFX9-GISEL-NEXT:    s_addk_i32 s32, 0x400
2680; GFX9-GISEL-NEXT:  .LBB17_1: ; =>This Inner Loop Header: Depth=1
2681; GFX9-GISEL-NEXT:    s_ff1_i32_b64 s7, s[4:5]
2682; GFX9-GISEL-NEXT:    v_readlane_b32 s8, v0, s7
2683; GFX9-GISEL-NEXT:    s_bitset0_b64 s[4:5], s7
2684; GFX9-GISEL-NEXT:    s_max_u32 s6, s6, s8
2685; GFX9-GISEL-NEXT:    s_cmp_lg_u64 s[4:5], 0
2686; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB17_1
2687; GFX9-GISEL-NEXT:  ; %bb.2:
2688; GFX9-GISEL-NEXT:    s_mov_b32 s4, s32
2689; GFX9-GISEL-NEXT:    s_lshl_b32 s5, s6, 6
2690; GFX9-GISEL-NEXT:    s_add_u32 s32, s4, s5
2691; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, 0x29a
2692; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, s4
2693; GFX9-GISEL-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen
2694; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
2695; GFX9-GISEL-NEXT:    s_mov_b32 s32, s33
2696; GFX9-GISEL-NEXT:    s_mov_b32 s33, s9
2697; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
2698;
2699; GFX11-SDAG-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64:
2700; GFX11-SDAG:       ; %bb.0:
2701; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2702; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2703; GFX11-SDAG-NEXT:    s_mov_b32 s4, s33
2704; GFX11-SDAG-NEXT:    s_mov_b32 s1, exec_lo
2705; GFX11-SDAG-NEXT:    s_mov_b32 s0, 0
2706; GFX11-SDAG-NEXT:    s_mov_b32 s33, s32
2707; GFX11-SDAG-NEXT:    v_and_b32_e32 v0, -16, v0
2708; GFX11-SDAG-NEXT:    s_add_i32 s32, s32, 16
2709; GFX11-SDAG-NEXT:  .LBB17_1: ; =>This Inner Loop Header: Depth=1
2710; GFX11-SDAG-NEXT:    s_ctz_i32_b32 s2, s1
2711; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2712; GFX11-SDAG-NEXT:    v_readlane_b32 s3, v0, s2
2713; GFX11-SDAG-NEXT:    s_bitset0_b32 s1, s2
2714; GFX11-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2715; GFX11-SDAG-NEXT:    s_max_u32 s0, s0, s3
2716; GFX11-SDAG-NEXT:    s_cmp_lg_u32 s1, 0
2717; GFX11-SDAG-NEXT:    s_cbranch_scc1 .LBB17_1
2718; GFX11-SDAG-NEXT:  ; %bb.2:
2719; GFX11-SDAG-NEXT:    s_mov_b32 s1, s32
2720; GFX11-SDAG-NEXT:    v_mov_b32_e32 v1, 0x29a
2721; GFX11-SDAG-NEXT:    v_lshl_add_u32 v0, s0, 5, s1
2722; GFX11-SDAG-NEXT:    scratch_store_b32 off, v1, s1 dlc
2723; GFX11-SDAG-NEXT:    s_waitcnt_vscnt null, 0x0
2724; GFX11-SDAG-NEXT:    v_readfirstlane_b32 s32, v0
2725; GFX11-SDAG-NEXT:    s_mov_b32 s32, s33
2726; GFX11-SDAG-NEXT:    s_mov_b32 s33, s4
2727; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
2728;
2729; GFX11-GISEL-LABEL: test_dynamic_stackalloc_device_divergent_non_standard_size_i64:
2730; GFX11-GISEL:       ; %bb.0:
2731; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2732; GFX11-GISEL-NEXT:    v_lshl_add_u32 v0, v0, 2, 15
2733; GFX11-GISEL-NEXT:    s_mov_b32 s4, s33
2734; GFX11-GISEL-NEXT:    s_mov_b32 s1, exec_lo
2735; GFX11-GISEL-NEXT:    s_mov_b32 s0, 0
2736; GFX11-GISEL-NEXT:    s_mov_b32 s33, s32
2737; GFX11-GISEL-NEXT:    v_and_b32_e32 v0, -16, v0
2738; GFX11-GISEL-NEXT:    s_add_i32 s32, s32, 16
2739; GFX11-GISEL-NEXT:  .LBB17_1: ; =>This Inner Loop Header: Depth=1
2740; GFX11-GISEL-NEXT:    s_ctz_i32_b32 s2, s1
2741; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
2742; GFX11-GISEL-NEXT:    v_readlane_b32 s3, v0, s2
2743; GFX11-GISEL-NEXT:    s_bitset0_b32 s1, s2
2744; GFX11-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2745; GFX11-GISEL-NEXT:    s_max_u32 s0, s0, s3
2746; GFX11-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
2747; GFX11-GISEL-NEXT:    s_cbranch_scc1 .LBB17_1
2748; GFX11-GISEL-NEXT:  ; %bb.2:
2749; GFX11-GISEL-NEXT:    v_mov_b32_e32 v0, 0x29a
2750; GFX11-GISEL-NEXT:    s_mov_b32 s1, s32
2751; GFX11-GISEL-NEXT:    s_lshl_b32 s0, s0, 5
2752; GFX11-GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
2753; GFX11-GISEL-NEXT:    s_add_u32 s32, s1, s0
2754; GFX11-GISEL-NEXT:    scratch_store_b32 off, v0, s1 dlc
2755; GFX11-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
2756; GFX11-GISEL-NEXT:    s_mov_b32 s32, s33
2757; GFX11-GISEL-NEXT:    s_mov_b32 s33, s4
2758; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
2759  %alloca = alloca i32, i64 %n, align 2, addrspace(5)
2760  store volatile i32 666, ptr addrspace(5) %alloca
2761  ret void
2762}
2763