xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
3
4; Make sure the branch targets are correct after lowering llvm.amdgcn.if
5
6define i32 @divergent_if_swap_brtarget_order0(i32 %value) {
7; CHECK-LABEL: divergent_if_swap_brtarget_order0:
8; CHECK:       ; %bb.0: ; %entry
9; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
11; CHECK-NEXT:    ; implicit-def: $vgpr0
12; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
13; CHECK-NEXT:    s_cbranch_execz .LBB0_2
14; CHECK-NEXT:  ; %bb.1: ; %if.true
15; CHECK-NEXT:    global_load_dword v0, v[0:1], off glc
16; CHECK-NEXT:    s_waitcnt vmcnt(0)
17; CHECK-NEXT:  .LBB0_2: ; %endif
18; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
19; CHECK-NEXT:    s_setpc_b64 s[30:31]
20entry:
21  %c = icmp ne i32 %value, 0
22  br i1 %c, label %if.true, label %endif
23
24if.true:
25  %val = load volatile i32, ptr addrspace(1) undef
26  br label %endif
27
28endif:
29  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
30  ret i32 %v
31}
32
33define i32 @divergent_if_swap_brtarget_order1(i32 %value) {
34; CHECK-LABEL: divergent_if_swap_brtarget_order1:
35; CHECK:       ; %bb.0: ; %entry
36; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
38; CHECK-NEXT:    ; implicit-def: $vgpr0
39; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
40; CHECK-NEXT:    s_cbranch_execz .LBB1_2
41; CHECK-NEXT:  ; %bb.1: ; %if.true
42; CHECK-NEXT:    global_load_dword v0, v[0:1], off glc
43; CHECK-NEXT:    s_waitcnt vmcnt(0)
44; CHECK-NEXT:  .LBB1_2: ; %endif
45; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
46; CHECK-NEXT:    s_setpc_b64 s[30:31]
47entry:
48  %c = icmp ne i32 %value, 0
49  br i1 %c, label %if.true, label %endif
50
51endif:
52  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
53  ret i32 %v
54
55if.true:
56  %val = load volatile i32, ptr addrspace(1) undef
57  br label %endif
58}
59
60; Make sure and 1 is inserted on llvm.amdgcn.if
61define i32 @divergent_if_nonboolean_condition0(i32 %value) {
62; CHECK-LABEL: divergent_if_nonboolean_condition0:
63; CHECK:       ; %bb.0: ; %entry
64; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
66; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
67; CHECK-NEXT:    ; implicit-def: $vgpr0
68; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
69; CHECK-NEXT:    s_cbranch_execz .LBB2_2
70; CHECK-NEXT:  ; %bb.1: ; %if.true
71; CHECK-NEXT:    global_load_dword v0, v[0:1], off glc
72; CHECK-NEXT:    s_waitcnt vmcnt(0)
73; CHECK-NEXT:  .LBB2_2: ; %endif
74; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
75; CHECK-NEXT:    s_setpc_b64 s[30:31]
76entry:
77  %c = trunc i32 %value to i1
78  br i1 %c, label %if.true, label %endif
79
80if.true:
81  %val = load volatile i32, ptr addrspace(1) undef
82  br label %endif
83
84endif:
85  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
86  ret i32 %v
87}
88
89; Make sure and 1 is inserted on llvm.amdgcn.if
90define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) {
91; CHECK-LABEL: divergent_if_nonboolean_condition1:
92; CHECK:       ; %bb.0: ; %entry
93; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
94; CHECK-NEXT:    global_load_dword v0, v[0:1], off
95; CHECK-NEXT:    s_waitcnt vmcnt(0)
96; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
97; CHECK-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
98; CHECK-NEXT:    ; implicit-def: $vgpr0
99; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], vcc
100; CHECK-NEXT:    s_cbranch_execz .LBB3_2
101; CHECK-NEXT:  ; %bb.1: ; %if.true
102; CHECK-NEXT:    global_load_dword v0, v[0:1], off glc
103; CHECK-NEXT:    s_waitcnt vmcnt(0)
104; CHECK-NEXT:  .LBB3_2: ; %endif
105; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
106; CHECK-NEXT:    s_setpc_b64 s[30:31]
107entry:
108  %value = load i32, ptr addrspace(1) %ptr
109  %c = trunc i32 %value to i1
110  br i1 %c, label %if.true, label %endif
111
112if.true:
113  %val = load volatile i32, ptr addrspace(1) undef
114  br label %endif
115
116endif:
117  %v = phi i32 [ %val, %if.true ], [ undef, %entry ]
118  ret i32 %v
119}
120
121@external_constant = external addrspace(4) constant i32, align 4
122@const.ptr = external addrspace(4) constant ptr, align 4
123
124; Make sure this case compiles. G_ICMP was mis-mapped due to having
125; the result register class constrained by llvm.amdgcn.if lowering.
126define void @constrained_if_register_class() {
127; CHECK-LABEL: constrained_if_register_class:
128; CHECK:       ; %bb.0: ; %bb
129; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; CHECK-NEXT:    s_getpc_b64 s[4:5]
131; CHECK-NEXT:    s_add_u32 s4, s4, external_constant@gotpcrel32@lo+4
132; CHECK-NEXT:    s_addc_u32 s5, s5, external_constant@gotpcrel32@hi+12
133; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
134; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
135; CHECK-NEXT:    s_load_dword s4, s[4:5], 0x0
136; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
137; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
138; CHECK-NEXT:    s_cbranch_scc0 .LBB4_2
139; CHECK-NEXT:  .LBB4_1: ; %bb12
140; CHECK-NEXT:    s_setpc_b64 s[30:31]
141; CHECK-NEXT:  .LBB4_2: ; %bb2
142; CHECK-NEXT:    s_getpc_b64 s[4:5]
143; CHECK-NEXT:    s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
144; CHECK-NEXT:    s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+12
145; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
146; CHECK-NEXT:    v_mov_b32_e32 v0, 0
147; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
148; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
149; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
150; CHECK-NEXT:    global_load_dword v0, v0, s[4:5]
151; CHECK-NEXT:    s_mov_b32 s4, -1
152; CHECK-NEXT:    s_waitcnt vmcnt(0)
153; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, 1.0, v0
154; CHECK-NEXT:    s_cbranch_vccnz .LBB4_4
155; CHECK-NEXT:  ; %bb.3: ; %bb7
156; CHECK-NEXT:    s_mov_b32 s4, 0
157; CHECK-NEXT:  .LBB4_4: ; %bb8
158; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
159; CHECK-NEXT:    s_cbranch_scc1 .LBB4_1
160; CHECK-NEXT:  ; %bb.5: ; %bb11
161; CHECK-NEXT:    v_mov_b32_e32 v0, 4.0
162; CHECK-NEXT:    buffer_store_dword v0, v0, s[0:3], 0 offen
163; CHECK-NEXT:    s_waitcnt vmcnt(0)
164; CHECK-NEXT:    s_setpc_b64 s[30:31]
165bb:
166  %tmp = load i32, ptr addrspace(4) @external_constant
167  %tmp1 = icmp ne i32 %tmp, 0
168  br i1 %tmp1, label %bb12, label %bb2
169
170bb2:
171  %ptr = load ptr, ptr addrspace(4) @const.ptr
172  %tmp4 = load float, ptr %ptr, align 4
173  %tmp5 = fcmp olt float %tmp4, 1.0
174  %tmp6 = or i1 %tmp5, false
175  br i1 %tmp6, label %bb8, label %bb7
176
177bb7:
178  br label %bb8
179
180bb8:
181  %tmp9 = phi i32 [ 0, %bb7 ], [ -1, %bb2 ]
182  %tmp10 = icmp eq i32 %tmp9, 0
183  br i1 %tmp10, label %bb11, label %bb12
184
185bb11:
186  store float 4.0, ptr addrspace(5) undef, align 4
187  br label %bb12
188
189bb12:
190  ret void
191}
192
193define amdgpu_kernel void @break_loop(i32 %arg) {
194; CHECK-LABEL: break_loop:
195; CHECK:       ; %bb.0: ; %bb
196; CHECK-NEXT:    s_load_dword s0, s[8:9], 0x0
197; CHECK-NEXT:    ; implicit-def: $sgpr2_sgpr3
198; CHECK-NEXT:    ; implicit-def: $vgpr1
199; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
200; CHECK-NEXT:    v_subrev_u32_e32 v0, s0, v0
201; CHECK-NEXT:    s_mov_b64 s[0:1], 0
202; CHECK-NEXT:    s_branch .LBB5_3
203; CHECK-NEXT:  .LBB5_1: ; %bb4
204; CHECK-NEXT:    ; in Loop: Header=BB5_3 Depth=1
205; CHECK-NEXT:    global_load_dword v2, v[0:1], off glc
206; CHECK-NEXT:    s_waitcnt vmcnt(0)
207; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
208; CHECK-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v2
209; CHECK-NEXT:    s_and_b64 s[4:5], exec, vcc
210; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
211; CHECK-NEXT:  .LBB5_2: ; %Flow
212; CHECK-NEXT:    ; in Loop: Header=BB5_3 Depth=1
213; CHECK-NEXT:    s_and_b64 s[4:5], exec, s[2:3]
214; CHECK-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
215; CHECK-NEXT:    s_andn2_b64 exec, exec, s[0:1]
216; CHECK-NEXT:    s_cbranch_execz .LBB5_5
217; CHECK-NEXT:  .LBB5_3: ; %bb1
218; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
219; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
220; CHECK-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
221; CHECK-NEXT:    s_and_b64 s[4:5], exec, -1
222; CHECK-NEXT:    v_cmp_le_i32_e32 vcc, 0, v1
223; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], s[4:5]
224; CHECK-NEXT:    s_cbranch_vccz .LBB5_1
225; CHECK-NEXT:  ; %bb.4: ; in Loop: Header=BB5_3 Depth=1
226; CHECK-NEXT:    ; implicit-def: $vgpr1
227; CHECK-NEXT:    s_branch .LBB5_2
228; CHECK-NEXT:  .LBB5_5: ; %bb9
229; CHECK-NEXT:    s_endpgm
230bb:
231  %id = call i32 @llvm.amdgcn.workitem.id.x()
232  %tmp = sub i32 %id, %arg
233  br label %bb1
234
235bb1:
236  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
237  %lsr.iv.next = add i32 %lsr.iv, 1
238  %cmp0 = icmp slt i32 %lsr.iv.next, 0
239  br i1 %cmp0, label %bb4, label %bb9
240
241bb4:
242  %load = load volatile i32, ptr addrspace(1) undef, align 4
243  %cmp1 = icmp slt i32 %tmp, %load
244  br i1 %cmp1, label %bb1, label %bb9
245
246bb9:
247  ret void
248}
249
250declare i32 @llvm.amdgcn.workitem.id.x()
251