xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX8DAGISEL %s
3; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX8GISEL %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX9DAGISEL %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX9GISEL %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1064DAGISEL %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1064GISEL %s
8; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1032DAGISEL %s
9; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1032GISEL %s
10; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11DAGISEL,GFX1164DAGISEL %s
11; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11GISEL,GFX1164GISEL %s
12; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11DAGISEL,GFX1132DAGISEL %s
13; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11GISEL,GFX1132GISEL %s
14
15
16declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg)
17declare i32 @llvm.amdgcn.workitem.id.x()
18
19define amdgpu_kernel void @uniform_value(ptr addrspace(1) %out, i32 %in) {
20; GFX8DAGISEL-LABEL: uniform_value:
21; GFX8DAGISEL:       ; %bb.0: ; %entry
22; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
23; GFX8DAGISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
24; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
25; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
26; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
27; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
28; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
29; GFX8DAGISEL-NEXT:    s_endpgm
30;
31; GFX8GISEL-LABEL: uniform_value:
32; GFX8GISEL:       ; %bb.0: ; %entry
33; GFX8GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
34; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
35; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
36; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
37; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
38; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
39; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
40; GFX8GISEL-NEXT:    s_endpgm
41;
42; GFX9DAGISEL-LABEL: uniform_value:
43; GFX9DAGISEL:       ; %bb.0: ; %entry
44; GFX9DAGISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
45; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
46; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
47; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
48; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
49; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
50; GFX9DAGISEL-NEXT:    s_endpgm
51;
52; GFX9GISEL-LABEL: uniform_value:
53; GFX9GISEL:       ; %bb.0: ; %entry
54; GFX9GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
55; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
56; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
57; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
58; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
59; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
60; GFX9GISEL-NEXT:    s_endpgm
61;
62; GFX10DAGISEL-LABEL: uniform_value:
63; GFX10DAGISEL:       ; %bb.0: ; %entry
64; GFX10DAGISEL-NEXT:    s_clause 0x1
65; GFX10DAGISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
66; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
67; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
68; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
69; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
70; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
71; GFX10DAGISEL-NEXT:    s_endpgm
72;
73; GFX10GISEL-LABEL: uniform_value:
74; GFX10GISEL:       ; %bb.0: ; %entry
75; GFX10GISEL-NEXT:    s_clause 0x1
76; GFX10GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
77; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
78; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
79; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
80; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, s2
81; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
82; GFX10GISEL-NEXT:    s_endpgm
83;
84; GFX1164DAGISEL-LABEL: uniform_value:
85; GFX1164DAGISEL:       ; %bb.0: ; %entry
86; GFX1164DAGISEL-NEXT:    s_clause 0x1
87; GFX1164DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
88; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
89; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
90; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
91; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
92; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
93; GFX1164DAGISEL-NEXT:    s_endpgm
94;
95; GFX1164GISEL-LABEL: uniform_value:
96; GFX1164GISEL:       ; %bb.0: ; %entry
97; GFX1164GISEL-NEXT:    s_clause 0x1
98; GFX1164GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
99; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
100; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
101; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
102; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
103; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
104; GFX1164GISEL-NEXT:    s_endpgm
105;
106; GFX1132DAGISEL-LABEL: uniform_value:
107; GFX1132DAGISEL:       ; %bb.0: ; %entry
108; GFX1132DAGISEL-NEXT:    s_clause 0x1
109; GFX1132DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
110; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
111; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
112; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
113; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
114; GFX1132DAGISEL-NEXT:    s_endpgm
115;
116; GFX1132GISEL-LABEL: uniform_value:
117; GFX1132GISEL:       ; %bb.0: ; %entry
118; GFX1132GISEL-NEXT:    s_clause 0x1
119; GFX1132GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
120; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
121; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
122; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
123; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
124; GFX1132GISEL-NEXT:    s_endpgm
125entry:
126    %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %in, i32 1)
127    store i32 %result, ptr addrspace(1) %out
128    ret void
129}
130
131define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
132; GFX8DAGISEL-LABEL: const_value:
133; GFX8DAGISEL:       ; %bb.0: ; %entry
134; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
135; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
136; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
137; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
138; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
139; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
140; GFX8DAGISEL-NEXT:    s_endpgm
141;
142; GFX8GISEL-LABEL: const_value:
143; GFX8GISEL:       ; %bb.0: ; %entry
144; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
145; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
146; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
147; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
148; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
149; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
150; GFX8GISEL-NEXT:    s_endpgm
151;
152; GFX9DAGISEL-LABEL: const_value:
153; GFX9DAGISEL:       ; %bb.0: ; %entry
154; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
155; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
156; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
157; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
158; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
159; GFX9DAGISEL-NEXT:    s_endpgm
160;
161; GFX9GISEL-LABEL: const_value:
162; GFX9GISEL:       ; %bb.0: ; %entry
163; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
164; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
165; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
166; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
167; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
168; GFX9GISEL-NEXT:    s_endpgm
169;
170; GFX10DAGISEL-LABEL: const_value:
171; GFX10DAGISEL:       ; %bb.0: ; %entry
172; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
173; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
174; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
175; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
176; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
177; GFX10DAGISEL-NEXT:    s_endpgm
178;
179; GFX10GISEL-LABEL: const_value:
180; GFX10GISEL:       ; %bb.0: ; %entry
181; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
182; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
183; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
184; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
185; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
186; GFX10GISEL-NEXT:    s_endpgm
187;
188; GFX1164DAGISEL-LABEL: const_value:
189; GFX1164DAGISEL:       ; %bb.0: ; %entry
190; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
191; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
192; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
193; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
194; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
195; GFX1164DAGISEL-NEXT:    s_endpgm
196;
197; GFX1164GISEL-LABEL: const_value:
198; GFX1164GISEL:       ; %bb.0: ; %entry
199; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
200; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
201; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
202; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
203; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
204; GFX1164GISEL-NEXT:    s_endpgm
205;
206; GFX1132DAGISEL-LABEL: const_value:
207; GFX1132DAGISEL:       ; %bb.0: ; %entry
208; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
209; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
210; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
211; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
212; GFX1132DAGISEL-NEXT:    s_endpgm
213;
214; GFX1132GISEL-LABEL: const_value:
215; GFX1132GISEL:       ; %bb.0: ; %entry
216; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
217; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
218; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
219; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
220; GFX1132GISEL-NEXT:    s_endpgm
221entry:
222    %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1)
223    store i32 %result, ptr addrspace(1) %out
224    ret void
225}
226
227define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) {
228; GFX8DAGISEL-LABEL: poison_value:
229; GFX8DAGISEL:       ; %bb.0: ; %entry
230; GFX8DAGISEL-NEXT:    s_endpgm
231;
232; GFX8GISEL-LABEL: poison_value:
233; GFX8GISEL:       ; %bb.0: ; %entry
234; GFX8GISEL-NEXT:    s_endpgm
235;
236; GFX9DAGISEL-LABEL: poison_value:
237; GFX9DAGISEL:       ; %bb.0: ; %entry
238; GFX9DAGISEL-NEXT:    s_endpgm
239;
240; GFX9GISEL-LABEL: poison_value:
241; GFX9GISEL:       ; %bb.0: ; %entry
242; GFX9GISEL-NEXT:    s_endpgm
243;
244; GFX10DAGISEL-LABEL: poison_value:
245; GFX10DAGISEL:       ; %bb.0: ; %entry
246; GFX10DAGISEL-NEXT:    s_endpgm
247;
248; GFX10GISEL-LABEL: poison_value:
249; GFX10GISEL:       ; %bb.0: ; %entry
250; GFX10GISEL-NEXT:    s_endpgm
251;
252; GFX11DAGISEL-LABEL: poison_value:
253; GFX11DAGISEL:       ; %bb.0: ; %entry
254; GFX11DAGISEL-NEXT:    s_endpgm
255;
256; GFX11GISEL-LABEL: poison_value:
257; GFX11GISEL:       ; %bb.0: ; %entry
258; GFX11GISEL-NEXT:    s_endpgm
259entry:
260    %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1)
261    store i32 %result, ptr addrspace(1) %out
262    ret void
263}
264
265define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
266; GFX8DAGISEL-LABEL: divergent_value:
267; GFX8DAGISEL:       ; %bb.0: ; %entry
268; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
269; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
270; GFX8DAGISEL-NEXT:    s_mov_b32 s4, -1
271; GFX8DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
272; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
273; GFX8DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
274; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
275; GFX8DAGISEL-NEXT:    s_min_u32 s4, s4, s6
276; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
277; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
278; GFX8DAGISEL-NEXT:  ; %bb.2:
279; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
280; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
281; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
282; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s4
283; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
284; GFX8DAGISEL-NEXT:    s_endpgm
285;
286; GFX8GISEL-LABEL: divergent_value:
287; GFX8GISEL:       ; %bb.0: ; %entry
288; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
289; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
290; GFX8GISEL-NEXT:    s_mov_b32 s4, -1
291; GFX8GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
292; GFX8GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
293; GFX8GISEL-NEXT:    v_readlane_b32 s6, v0, s5
294; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
295; GFX8GISEL-NEXT:    s_min_u32 s4, s4, s6
296; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
297; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
298; GFX8GISEL-NEXT:  ; %bb.2:
299; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
300; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
301; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s4
302; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
303; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
304; GFX8GISEL-NEXT:    s_endpgm
305;
306; GFX9DAGISEL-LABEL: divergent_value:
307; GFX9DAGISEL:       ; %bb.0: ; %entry
308; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
309; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
310; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
311; GFX9DAGISEL-NEXT:    s_mov_b32 s4, -1
312; GFX9DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
313; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
314; GFX9DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
315; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
316; GFX9DAGISEL-NEXT:    s_min_u32 s4, s4, s6
317; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
318; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
319; GFX9DAGISEL-NEXT:  ; %bb.2:
320; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
321; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
322; GFX9DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
323; GFX9DAGISEL-NEXT:    s_endpgm
324;
325; GFX9GISEL-LABEL: divergent_value:
326; GFX9GISEL:       ; %bb.0: ; %entry
327; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
328; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
329; GFX9GISEL-NEXT:    s_mov_b32 s4, -1
330; GFX9GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
331; GFX9GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
332; GFX9GISEL-NEXT:    v_readlane_b32 s6, v0, s5
333; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
334; GFX9GISEL-NEXT:    s_min_u32 s4, s4, s6
335; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
336; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
337; GFX9GISEL-NEXT:  ; %bb.2:
338; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s4
339; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
340; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
341; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
342; GFX9GISEL-NEXT:    s_endpgm
343;
344; GFX1064DAGISEL-LABEL: divergent_value:
345; GFX1064DAGISEL:       ; %bb.0: ; %entry
346; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
347; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
348; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
349; GFX1064DAGISEL-NEXT:    s_mov_b32 s4, -1
350; GFX1064DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
351; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
352; GFX1064DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
353; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
354; GFX1064DAGISEL-NEXT:    s_min_u32 s4, s4, s6
355; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
356; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
357; GFX1064DAGISEL-NEXT:  ; %bb.2:
358; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
359; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
360; GFX1064DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
361; GFX1064DAGISEL-NEXT:    s_endpgm
362;
363; GFX1064GISEL-LABEL: divergent_value:
364; GFX1064GISEL:       ; %bb.0: ; %entry
365; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
366; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
367; GFX1064GISEL-NEXT:    s_mov_b32 s4, -1
368; GFX1064GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
369; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s5, s[2:3]
370; GFX1064GISEL-NEXT:    v_readlane_b32 s6, v0, s5
371; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
372; GFX1064GISEL-NEXT:    s_min_u32 s4, s4, s6
373; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
374; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
375; GFX1064GISEL-NEXT:  ; %bb.2:
376; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s4
377; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
378; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
379; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
380; GFX1064GISEL-NEXT:    s_endpgm
381;
382; GFX1032DAGISEL-LABEL: divergent_value:
383; GFX1032DAGISEL:       ; %bb.0: ; %entry
384; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
385; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
386; GFX1032DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
387; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, -1
388; GFX1032DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
389; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s4, s3
390; GFX1032DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
391; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s3, s4
392; GFX1032DAGISEL-NEXT:    s_min_u32 s2, s2, s5
393; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
394; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
395; GFX1032DAGISEL-NEXT:  ; %bb.2:
396; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
397; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
398; GFX1032DAGISEL-NEXT:    global_store_dword v1, v0, s[0:1]
399; GFX1032DAGISEL-NEXT:    s_endpgm
400;
401; GFX1032GISEL-LABEL: divergent_value:
402; GFX1032GISEL:       ; %bb.0: ; %entry
403; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
404; GFX1032GISEL-NEXT:    s_mov_b32 s3, exec_lo
405; GFX1032GISEL-NEXT:    s_mov_b32 s2, -1
406; GFX1032GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
407; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s4, s3
408; GFX1032GISEL-NEXT:    v_readlane_b32 s5, v0, s4
409; GFX1032GISEL-NEXT:    s_bitset0_b32 s3, s4
410; GFX1032GISEL-NEXT:    s_min_u32 s2, s2, s5
411; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s3, 0
412; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
413; GFX1032GISEL-NEXT:  ; %bb.2:
414; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
415; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
416; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
417; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
418; GFX1032GISEL-NEXT:    s_endpgm
419;
420; GFX1164DAGISEL-LABEL: divergent_value:
421; GFX1164DAGISEL:       ; %bb.0: ; %entry
422; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
423; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
424; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
425; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
426; GFX1164DAGISEL-NEXT:    s_mov_b32 s4, -1
427; GFX1164DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
428; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
429; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
430; GFX1164DAGISEL-NEXT:    v_readlane_b32 s6, v0, s5
431; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s5
432; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
433; GFX1164DAGISEL-NEXT:    s_min_u32 s4, s4, s6
434; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
435; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
436; GFX1164DAGISEL-NEXT:  ; %bb.2:
437; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s4
438; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
439; GFX1164DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
440; GFX1164DAGISEL-NEXT:    s_endpgm
441;
442; GFX1164GISEL-LABEL: divergent_value:
443; GFX1164GISEL:       ; %bb.0: ; %entry
444; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
445; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
446; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
447; GFX1164GISEL-NEXT:    s_mov_b32 s4, -1
448; GFX1164GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
449; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s5, s[2:3]
450; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
451; GFX1164GISEL-NEXT:    v_readlane_b32 s6, v0, s5
452; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s5
453; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
454; GFX1164GISEL-NEXT:    s_min_u32 s4, s4, s6
455; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
456; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
457; GFX1164GISEL-NEXT:  ; %bb.2:
458; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s4
459; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
460; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
461; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
462; GFX1164GISEL-NEXT:    s_endpgm
463;
464; GFX1132DAGISEL-LABEL: divergent_value:
465; GFX1132DAGISEL:       ; %bb.0: ; %entry
466; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
467; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0
468; GFX1132DAGISEL-NEXT:    s_mov_b32 s3, exec_lo
469; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, -1
470; GFX1132DAGISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
471; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s4, s3
472; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
473; GFX1132DAGISEL-NEXT:    v_readlane_b32 s5, v0, s4
474; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s3, s4
475; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
476; GFX1132DAGISEL-NEXT:    s_min_u32 s2, s2, s5
477; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s3, 0
478; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB3_1
479; GFX1132DAGISEL-NEXT:  ; %bb.2:
480; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
481; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
482; GFX1132DAGISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
483; GFX1132DAGISEL-NEXT:    s_endpgm
484;
485; GFX1132GISEL-LABEL: divergent_value:
486; GFX1132GISEL:       ; %bb.0: ; %entry
487; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
488; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
489; GFX1132GISEL-NEXT:    s_mov_b32 s3, exec_lo
490; GFX1132GISEL-NEXT:    s_mov_b32 s2, -1
491; GFX1132GISEL-NEXT:  .LBB3_1: ; =>This Inner Loop Header: Depth=1
492; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s4, s3
493; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
494; GFX1132GISEL-NEXT:    v_readlane_b32 s5, v0, s4
495; GFX1132GISEL-NEXT:    s_bitset0_b32 s3, s4
496; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
497; GFX1132GISEL-NEXT:    s_min_u32 s2, s2, s5
498; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s3, 0
499; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB3_1
500; GFX1132GISEL-NEXT:  ; %bb.2:
501; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0
502; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
503; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
504; GFX1132GISEL-NEXT:    s_endpgm
505entry:
506    %id.x = call i32 @llvm.amdgcn.workitem.id.x()
507    %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %id.x, i32 1)
508    store i32 %result, ptr addrspace(1) %out
509    ret void
510}
511
512define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) {
513; GFX8DAGISEL-LABEL: divergent_cfg:
514; GFX8DAGISEL:       ; %bb.0: ; %entry
515; GFX8DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
516; GFX8DAGISEL-NEXT:    ; implicit-def: $sgpr2
517; GFX8DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
518; GFX8DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
519; GFX8DAGISEL-NEXT:  ; %bb.1: ; %else
520; GFX8DAGISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
521; GFX8DAGISEL-NEXT:    ; implicit-def: $vgpr0
522; GFX8DAGISEL-NEXT:  ; %bb.2: ; %Flow
523; GFX8DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
524; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
525; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
526; GFX8DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
527; GFX8DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
528; GFX8DAGISEL-NEXT:  ; %bb.3: ; %if
529; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
530; GFX8DAGISEL-NEXT:    s_mov_b32 s6, -1
531; GFX8DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
532; GFX8DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
533; GFX8DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
534; GFX8DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
535; GFX8DAGISEL-NEXT:    s_min_u32 s6, s6, s8
536; GFX8DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
537; GFX8DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
538; GFX8DAGISEL-NEXT:  ; %bb.5:
539; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
540; GFX8DAGISEL-NEXT:  .LBB4_6: ; %endif
541; GFX8DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
542; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
543; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
544; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
545; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
546; GFX8DAGISEL-NEXT:    flat_store_dword v[2:3], v1
547; GFX8DAGISEL-NEXT:    s_endpgm
548;
549; GFX8GISEL-LABEL: divergent_cfg:
550; GFX8GISEL:       ; %bb.0: ; %entry
551; GFX8GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
552; GFX8GISEL-NEXT:    ; implicit-def: $sgpr6
553; GFX8GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
554; GFX8GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
555; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_2
556; GFX8GISEL-NEXT:  ; %bb.1: ; %else
557; GFX8GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
558; GFX8GISEL-NEXT:    ; implicit-def: $vgpr0
559; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
560; GFX8GISEL-NEXT:    s_mov_b32 s6, s2
561; GFX8GISEL-NEXT:  .LBB4_2: ; %Flow
562; GFX8GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
563; GFX8GISEL-NEXT:    s_cbranch_execz .LBB4_5
564; GFX8GISEL-NEXT:  ; %bb.3: ; %if
565; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
566; GFX8GISEL-NEXT:    s_mov_b32 s6, -1
567; GFX8GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
568; GFX8GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
569; GFX8GISEL-NEXT:    v_readlane_b32 s8, v0, s7
570; GFX8GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
571; GFX8GISEL-NEXT:    s_min_u32 s6, s6, s8
572; GFX8GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
573; GFX8GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
574; GFX8GISEL-NEXT:  .LBB4_5: ; %endif
575; GFX8GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
576; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
577; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s6
578; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
579; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
580; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
581; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
582; GFX8GISEL-NEXT:    s_endpgm
583;
584; GFX9DAGISEL-LABEL: divergent_cfg:
585; GFX9DAGISEL:       ; %bb.0: ; %entry
586; GFX9DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
587; GFX9DAGISEL-NEXT:    ; implicit-def: $sgpr2
588; GFX9DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
589; GFX9DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
590; GFX9DAGISEL-NEXT:  ; %bb.1: ; %else
591; GFX9DAGISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
592; GFX9DAGISEL-NEXT:    ; implicit-def: $vgpr0
593; GFX9DAGISEL-NEXT:  ; %bb.2: ; %Flow
594; GFX9DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
595; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
596; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
597; GFX9DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
598; GFX9DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
599; GFX9DAGISEL-NEXT:  ; %bb.3: ; %if
600; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
601; GFX9DAGISEL-NEXT:    s_mov_b32 s6, -1
602; GFX9DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
603; GFX9DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
604; GFX9DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
605; GFX9DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
606; GFX9DAGISEL-NEXT:    s_min_u32 s6, s6, s8
607; GFX9DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
608; GFX9DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
609; GFX9DAGISEL-NEXT:  ; %bb.5:
610; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
611; GFX9DAGISEL-NEXT:  .LBB4_6: ; %endif
612; GFX9DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
613; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
614; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
615; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
616; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
617; GFX9DAGISEL-NEXT:    s_endpgm
618;
619; GFX9GISEL-LABEL: divergent_cfg:
620; GFX9GISEL:       ; %bb.0: ; %entry
621; GFX9GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
622; GFX9GISEL-NEXT:    ; implicit-def: $sgpr6
623; GFX9GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
624; GFX9GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
625; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_2
626; GFX9GISEL-NEXT:  ; %bb.1: ; %else
627; GFX9GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
628; GFX9GISEL-NEXT:    ; implicit-def: $vgpr0
629; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
630; GFX9GISEL-NEXT:    s_mov_b32 s6, s2
631; GFX9GISEL-NEXT:  .LBB4_2: ; %Flow
632; GFX9GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
633; GFX9GISEL-NEXT:    s_cbranch_execz .LBB4_5
634; GFX9GISEL-NEXT:  ; %bb.3: ; %if
635; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
636; GFX9GISEL-NEXT:    s_mov_b32 s6, -1
637; GFX9GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
638; GFX9GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
639; GFX9GISEL-NEXT:    v_readlane_b32 s8, v0, s7
640; GFX9GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
641; GFX9GISEL-NEXT:    s_min_u32 s6, s6, s8
642; GFX9GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
643; GFX9GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
644; GFX9GISEL-NEXT:  .LBB4_5: ; %endif
645; GFX9GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
646; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
647; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s6
648; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
649; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
650; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
651; GFX9GISEL-NEXT:    s_endpgm
652;
653; GFX1064DAGISEL-LABEL: divergent_cfg:
654; GFX1064DAGISEL:       ; %bb.0: ; %entry
655; GFX1064DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc, 15, v0
656; GFX1064DAGISEL-NEXT:    ; implicit-def: $sgpr2
657; GFX1064DAGISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
658; GFX1064DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
659; GFX1064DAGISEL-NEXT:  ; %bb.1: ; %else
660; GFX1064DAGISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
661; GFX1064DAGISEL-NEXT:    ; implicit-def: $vgpr0
662; GFX1064DAGISEL-NEXT:  ; %bb.2: ; %Flow
663; GFX1064DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
664; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
665; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
666; GFX1064DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
667; GFX1064DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
668; GFX1064DAGISEL-NEXT:  ; %bb.3: ; %if
669; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
670; GFX1064DAGISEL-NEXT:    s_mov_b32 s6, -1
671; GFX1064DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
672; GFX1064DAGISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
673; GFX1064DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
674; GFX1064DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
675; GFX1064DAGISEL-NEXT:    s_min_u32 s6, s6, s8
676; GFX1064DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
677; GFX1064DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
678; GFX1064DAGISEL-NEXT:  ; %bb.5:
679; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
680; GFX1064DAGISEL-NEXT:  .LBB4_6: ; %endif
681; GFX1064DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
682; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
683; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
684; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
685; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
686; GFX1064DAGISEL-NEXT:    s_endpgm
687;
688; GFX1064GISEL-LABEL: divergent_cfg:
689; GFX1064GISEL:       ; %bb.0: ; %entry
690; GFX1064GISEL-NEXT:    v_cmp_le_u32_e32 vcc, 16, v0
691; GFX1064GISEL-NEXT:    ; implicit-def: $sgpr6
692; GFX1064GISEL-NEXT:    s_and_saveexec_b64 s[0:1], vcc
693; GFX1064GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
694; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_2
695; GFX1064GISEL-NEXT:  ; %bb.1: ; %else
696; GFX1064GISEL-NEXT:    s_load_dword s2, s[4:5], 0x2c
697; GFX1064GISEL-NEXT:    ; implicit-def: $vgpr0
698; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
699; GFX1064GISEL-NEXT:    s_mov_b32 s6, s2
700; GFX1064GISEL-NEXT:  .LBB4_2: ; %Flow
701; GFX1064GISEL-NEXT:    s_andn2_saveexec_b64 s[0:1], s[0:1]
702; GFX1064GISEL-NEXT:    s_cbranch_execz .LBB4_5
703; GFX1064GISEL-NEXT:  ; %bb.3: ; %if
704; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
705; GFX1064GISEL-NEXT:    s_mov_b32 s6, -1
706; GFX1064GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
707; GFX1064GISEL-NEXT:    s_ff1_i32_b64 s7, s[2:3]
708; GFX1064GISEL-NEXT:    v_readlane_b32 s8, v0, s7
709; GFX1064GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
710; GFX1064GISEL-NEXT:    s_min_u32 s6, s6, s8
711; GFX1064GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
712; GFX1064GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
713; GFX1064GISEL-NEXT:  .LBB4_5: ; %endif
714; GFX1064GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
715; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
716; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s6
717; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
718; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
719; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
720; GFX1064GISEL-NEXT:    s_endpgm
721;
722; GFX1032DAGISEL-LABEL: divergent_cfg:
723; GFX1032DAGISEL:       ; %bb.0: ; %entry
724; GFX1032DAGISEL-NEXT:    v_cmp_lt_u32_e32 vcc_lo, 15, v0
725; GFX1032DAGISEL-NEXT:    ; implicit-def: $sgpr1
726; GFX1032DAGISEL-NEXT:    s_and_saveexec_b32 s0, vcc_lo
727; GFX1032DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
728; GFX1032DAGISEL-NEXT:  ; %bb.1: ; %else
729; GFX1032DAGISEL-NEXT:    s_load_dword s1, s[4:5], 0x2c
730; GFX1032DAGISEL-NEXT:    ; implicit-def: $vgpr0
731; GFX1032DAGISEL-NEXT:  ; %bb.2: ; %Flow
732; GFX1032DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
733; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
734; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
735; GFX1032DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
736; GFX1032DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
737; GFX1032DAGISEL-NEXT:  ; %bb.3: ; %if
738; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
739; GFX1032DAGISEL-NEXT:    s_mov_b32 s1, -1
740; GFX1032DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
741; GFX1032DAGISEL-NEXT:    s_ff1_i32_b32 s3, s2
742; GFX1032DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
743; GFX1032DAGISEL-NEXT:    s_bitset0_b32 s2, s3
744; GFX1032DAGISEL-NEXT:    s_min_u32 s1, s1, s6
745; GFX1032DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
746; GFX1032DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
747; GFX1032DAGISEL-NEXT:  ; %bb.5:
748; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
749; GFX1032DAGISEL-NEXT:  .LBB4_6: ; %endif
750; GFX1032DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
751; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
752; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
753; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
754; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
755; GFX1032DAGISEL-NEXT:    s_endpgm
756;
757; GFX1032GISEL-LABEL: divergent_cfg:
758; GFX1032GISEL:       ; %bb.0: ; %entry
759; GFX1032GISEL-NEXT:    v_cmp_le_u32_e32 vcc_lo, 16, v0
760; GFX1032GISEL-NEXT:    ; implicit-def: $sgpr0
761; GFX1032GISEL-NEXT:    s_and_saveexec_b32 s1, vcc_lo
762; GFX1032GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
763; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_2
764; GFX1032GISEL-NEXT:  ; %bb.1: ; %else
765; GFX1032GISEL-NEXT:    s_load_dword s0, s[4:5], 0x2c
766; GFX1032GISEL-NEXT:    ; implicit-def: $vgpr0
767; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
768; GFX1032GISEL-NEXT:    s_mov_b32 s0, s0
769; GFX1032GISEL-NEXT:  .LBB4_2: ; %Flow
770; GFX1032GISEL-NEXT:    s_andn2_saveexec_b32 s1, s1
771; GFX1032GISEL-NEXT:    s_cbranch_execz .LBB4_5
772; GFX1032GISEL-NEXT:  ; %bb.3: ; %if
773; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
774; GFX1032GISEL-NEXT:    s_mov_b32 s0, -1
775; GFX1032GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
776; GFX1032GISEL-NEXT:    s_ff1_i32_b32 s3, s2
777; GFX1032GISEL-NEXT:    v_readlane_b32 s6, v0, s3
778; GFX1032GISEL-NEXT:    s_bitset0_b32 s2, s3
779; GFX1032GISEL-NEXT:    s_min_u32 s0, s0, s6
780; GFX1032GISEL-NEXT:    s_cmp_lg_u32 s2, 0
781; GFX1032GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
782; GFX1032GISEL-NEXT:  .LBB4_5: ; %endif
783; GFX1032GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
784; GFX1032GISEL-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x24
785; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s0
786; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
787; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
788; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[2:3]
789; GFX1032GISEL-NEXT:    s_endpgm
790;
791; GFX1164DAGISEL-LABEL: divergent_cfg:
792; GFX1164DAGISEL:       ; %bb.0: ; %entry
793; GFX1164DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
794; GFX1164DAGISEL-NEXT:    s_mov_b64 s[0:1], exec
795; GFX1164DAGISEL-NEXT:    ; implicit-def: $sgpr2
796; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
797; GFX1164DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
798; GFX1164DAGISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
799; GFX1164DAGISEL-NEXT:  ; %bb.1: ; %else
800; GFX1164DAGISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
801; GFX1164DAGISEL-NEXT:    ; implicit-def: $vgpr0
802; GFX1164DAGISEL-NEXT:  ; %bb.2: ; %Flow
803; GFX1164DAGISEL-NEXT:    s_or_saveexec_b64 s[0:1], s[0:1]
804; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
805; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
806; GFX1164DAGISEL-NEXT:    s_xor_b64 exec, exec, s[0:1]
807; GFX1164DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
808; GFX1164DAGISEL-NEXT:  ; %bb.3: ; %if
809; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
810; GFX1164DAGISEL-NEXT:    s_mov_b32 s6, -1
811; GFX1164DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
812; GFX1164DAGISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
813; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
814; GFX1164DAGISEL-NEXT:    v_readlane_b32 s8, v0, s7
815; GFX1164DAGISEL-NEXT:    s_bitset0_b64 s[2:3], s7
816; GFX1164DAGISEL-NEXT:    s_min_u32 s6, s6, s8
817; GFX1164DAGISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
818; GFX1164DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
819; GFX1164DAGISEL-NEXT:  ; %bb.5:
820; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s6
821; GFX1164DAGISEL-NEXT:  .LBB4_6: ; %endif
822; GFX1164DAGISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
823; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
824; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
825; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
826; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
827; GFX1164DAGISEL-NEXT:    s_endpgm
828;
829; GFX1164GISEL-LABEL: divergent_cfg:
830; GFX1164GISEL:       ; %bb.0: ; %entry
831; GFX1164GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
832; GFX1164GISEL-NEXT:    s_mov_b64 s[0:1], exec
833; GFX1164GISEL-NEXT:    ; implicit-def: $sgpr6
834; GFX1164GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
835; GFX1164GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
836; GFX1164GISEL-NEXT:    s_xor_b64 s[0:1], exec, s[0:1]
837; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_2
838; GFX1164GISEL-NEXT:  ; %bb.1: ; %else
839; GFX1164GISEL-NEXT:    s_load_b32 s2, s[4:5], 0x2c
840; GFX1164GISEL-NEXT:    ; implicit-def: $vgpr0
841; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
842; GFX1164GISEL-NEXT:    s_mov_b32 s6, s2
843; GFX1164GISEL-NEXT:  .LBB4_2: ; %Flow
844; GFX1164GISEL-NEXT:    s_and_not1_saveexec_b64 s[0:1], s[0:1]
845; GFX1164GISEL-NEXT:    s_cbranch_execz .LBB4_5
846; GFX1164GISEL-NEXT:  ; %bb.3: ; %if
847; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
848; GFX1164GISEL-NEXT:    s_mov_b32 s6, -1
849; GFX1164GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
850; GFX1164GISEL-NEXT:    s_ctz_i32_b64 s7, s[2:3]
851; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
852; GFX1164GISEL-NEXT:    v_readlane_b32 s8, v0, s7
853; GFX1164GISEL-NEXT:    s_bitset0_b64 s[2:3], s7
854; GFX1164GISEL-NEXT:    s_min_u32 s6, s6, s8
855; GFX1164GISEL-NEXT:    s_cmp_lg_u64 s[2:3], 0
856; GFX1164GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
857; GFX1164GISEL-NEXT:  .LBB4_5: ; %endif
858; GFX1164GISEL-NEXT:    s_or_b64 exec, exec, s[0:1]
859; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
860; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s6
861; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
862; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
863; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
864; GFX1164GISEL-NEXT:    s_endpgm
865;
866; GFX1132DAGISEL-LABEL: divergent_cfg:
867; GFX1132DAGISEL:       ; %bb.0: ; %entry
868; GFX1132DAGISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
869; GFX1132DAGISEL-NEXT:    s_mov_b32 s0, exec_lo
870; GFX1132DAGISEL-NEXT:    ; implicit-def: $sgpr1
871; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
872; GFX1132DAGISEL-NEXT:    v_cmpx_lt_u32_e32 15, v0
873; GFX1132DAGISEL-NEXT:    s_xor_b32 s0, exec_lo, s0
874; GFX1132DAGISEL-NEXT:  ; %bb.1: ; %else
875; GFX1132DAGISEL-NEXT:    s_load_b32 s1, s[4:5], 0x2c
876; GFX1132DAGISEL-NEXT:    ; implicit-def: $vgpr0
877; GFX1132DAGISEL-NEXT:  ; %bb.2: ; %Flow
878; GFX1132DAGISEL-NEXT:    s_or_saveexec_b32 s0, s0
879; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
880; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
881; GFX1132DAGISEL-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
882; GFX1132DAGISEL-NEXT:    s_cbranch_execz .LBB4_6
883; GFX1132DAGISEL-NEXT:  ; %bb.3: ; %if
884; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
885; GFX1132DAGISEL-NEXT:    s_mov_b32 s1, -1
886; GFX1132DAGISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
887; GFX1132DAGISEL-NEXT:    s_ctz_i32_b32 s3, s2
888; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
889; GFX1132DAGISEL-NEXT:    v_readlane_b32 s6, v0, s3
890; GFX1132DAGISEL-NEXT:    s_bitset0_b32 s2, s3
891; GFX1132DAGISEL-NEXT:    s_min_u32 s1, s1, s6
892; GFX1132DAGISEL-NEXT:    s_cmp_lg_u32 s2, 0
893; GFX1132DAGISEL-NEXT:    s_cbranch_scc1 .LBB4_4
894; GFX1132DAGISEL-NEXT:  ; %bb.5:
895; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
896; GFX1132DAGISEL-NEXT:  .LBB4_6: ; %endif
897; GFX1132DAGISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
898; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
899; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
900; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
901; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
902; GFX1132DAGISEL-NEXT:    s_endpgm
903;
904; GFX1132GISEL-LABEL: divergent_cfg:
905; GFX1132GISEL:       ; %bb.0: ; %entry
906; GFX1132GISEL-NEXT:    v_and_b32_e32 v0, 0x3ff, v0
907; GFX1132GISEL-NEXT:    s_mov_b32 s1, exec_lo
908; GFX1132GISEL-NEXT:    ; implicit-def: $sgpr0
909; GFX1132GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
910; GFX1132GISEL-NEXT:    v_cmpx_le_u32_e32 16, v0
911; GFX1132GISEL-NEXT:    s_xor_b32 s1, exec_lo, s1
912; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_2
913; GFX1132GISEL-NEXT:  ; %bb.1: ; %else
914; GFX1132GISEL-NEXT:    s_load_b32 s0, s[4:5], 0x2c
915; GFX1132GISEL-NEXT:    ; implicit-def: $vgpr0
916; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
917; GFX1132GISEL-NEXT:    s_mov_b32 s0, s0
918; GFX1132GISEL-NEXT:  .LBB4_2: ; %Flow
919; GFX1132GISEL-NEXT:    s_and_not1_saveexec_b32 s1, s1
920; GFX1132GISEL-NEXT:    s_cbranch_execz .LBB4_5
921; GFX1132GISEL-NEXT:  ; %bb.3: ; %if
922; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
923; GFX1132GISEL-NEXT:    s_mov_b32 s0, -1
924; GFX1132GISEL-NEXT:  .LBB4_4: ; =>This Inner Loop Header: Depth=1
925; GFX1132GISEL-NEXT:    s_ctz_i32_b32 s3, s2
926; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
927; GFX1132GISEL-NEXT:    v_readlane_b32 s6, v0, s3
928; GFX1132GISEL-NEXT:    s_bitset0_b32 s2, s3
929; GFX1132GISEL-NEXT:    s_min_u32 s0, s0, s6
930; GFX1132GISEL-NEXT:    s_cmp_lg_u32 s2, 0
931; GFX1132GISEL-NEXT:    s_cbranch_scc1 .LBB4_4
932; GFX1132GISEL-NEXT:  .LBB4_5: ; %endif
933; GFX1132GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s1
934; GFX1132GISEL-NEXT:    s_load_b64 s[2:3], s[4:5], 0x24
935; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0
936; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
937; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[2:3]
938; GFX1132GISEL-NEXT:    s_endpgm
939entry:
940  %tid = call i32 @llvm.amdgcn.workitem.id.x()
941  %d_cmp = icmp ult i32 %tid, 16
942  br i1 %d_cmp, label %if, label %else
943
944if:
945  %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %tid, i32 1)
946  br label %endif
947
948else:
949  %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %in, i32 1)
950  br label %endif
951
952endif:
953  %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else]
954  store i32 %combine, ptr addrspace(1) %out
955  ret void
956}
957