xref: /llvm-project/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll (revision 2501ae58e3bb9a70d279a56d7b3a0ed70a8a852c)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
4; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s
5
6; A test with a divergent unreachable block and uniform return block. The
7; compiler needs to create a regions that includes them so that
8; StructurizeCFG correctly transform the CFG, and then SI Annotate Control
9; Flow does not fail during annotation.
10
11define void @my_func(i32 %0) {
12; IR-LABEL: define void @my_func(
13; IR-SAME: i32 [[TMP0:%.*]]) {
14; IR-NEXT:  entry:
15; IR-NEXT:    [[TMP1:%.*]] = load i32, ptr addrspace(4) null, align 8
16; IR-NEXT:    br label [[NODEBLOCK:%.*]]
17; IR:       NodeBlock:
18; IR-NEXT:    [[PIVOT:%.*]] = icmp sge i32 [[TMP1]], 1
19; IR-NEXT:    br i1 [[PIVOT]], label [[LEAFBLOCK1:%.*]], label [[FLOW:%.*]]
20; IR:       LeafBlock1:
21; IR-NEXT:    [[SWITCHLEAF2:%.*]] = icmp ne i32 [[TMP1]], 1
22; IR-NEXT:    br label [[FLOW]]
23; IR:       Flow:
24; IR-NEXT:    [[TMP2:%.*]] = phi i1 [ [[SWITCHLEAF2]], [[LEAFBLOCK1]] ], [ false, [[NODEBLOCK]] ]
25; IR-NEXT:    [[TMP3:%.*]] = phi i1 [ false, [[LEAFBLOCK1]] ], [ true, [[NODEBLOCK]] ]
26; IR-NEXT:    br i1 [[TMP3]], label [[LEAFBLOCK:%.*]], label [[FLOW11:%.*]]
27; IR:       LeafBlock:
28; IR-NEXT:    [[SWITCHLEAF:%.*]] = icmp eq i32 [[TMP1]], 0
29; IR-NEXT:    br i1 [[SWITCHLEAF]], label [[SW_BB2:%.*]], label [[FLOW12:%.*]]
30; IR:       Flow11:
31; IR-NEXT:    [[TMP4:%.*]] = phi i1 [ [[TMP9:%.*]], [[FLOW12]] ], [ false, [[FLOW]] ]
32; IR-NEXT:    [[TMP5:%.*]] = phi i1 [ [[TMP10:%.*]], [[FLOW12]] ], [ [[TMP2]], [[FLOW]] ]
33; IR-NEXT:    [[TMP6:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP5]])
34; IR-NEXT:    [[TMP7:%.*]] = extractvalue { i1, i64 } [[TMP6]], 0
35; IR-NEXT:    [[TMP8:%.*]] = extractvalue { i1, i64 } [[TMP6]], 1
36; IR-NEXT:    br i1 [[TMP7]], label [[DO_BODY:%.*]], label [[FLOW17:%.*]]
37; IR:       sw.bb2:
38; IR-NEXT:    br label [[NODEBLOCK7:%.*]]
39; IR:       Flow12:
40; IR-NEXT:    [[TMP9]] = phi i1 [ [[TMP24:%.*]], [[FLOW15:%.*]] ], [ false, [[LEAFBLOCK]] ]
41; IR-NEXT:    [[TMP10]] = phi i1 [ [[TMP25:%.*]], [[FLOW15]] ], [ true, [[LEAFBLOCK]] ]
42; IR-NEXT:    br label [[FLOW11]]
43; IR:       NodeBlock7:
44; IR-NEXT:    [[PIVOT8:%.*]] = icmp sge i32 [[TMP0]], 2
45; IR-NEXT:    [[TMP11:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[PIVOT8]])
46; IR-NEXT:    [[TMP12:%.*]] = extractvalue { i1, i64 } [[TMP11]], 0
47; IR-NEXT:    [[TMP13:%.*]] = extractvalue { i1, i64 } [[TMP11]], 1
48; IR-NEXT:    br i1 [[TMP12]], label [[LEAFBLOCK5:%.*]], label [[FLOW13:%.*]]
49; IR:       LeafBlock5:
50; IR-NEXT:    [[SWITCHLEAF6:%.*]] = icmp eq i32 [[TMP0]], 2
51; IR-NEXT:    br label [[FLOW13]]
52; IR:       Flow13:
53; IR-NEXT:    [[TMP14:%.*]] = phi i1 [ true, [[LEAFBLOCK5]] ], [ false, [[NODEBLOCK7]] ]
54; IR-NEXT:    [[TMP15:%.*]] = phi i1 [ [[SWITCHLEAF6]], [[LEAFBLOCK5]] ], [ false, [[NODEBLOCK7]] ]
55; IR-NEXT:    [[TMP16:%.*]] = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 [[TMP13]])
56; IR-NEXT:    [[TMP17:%.*]] = extractvalue { i1, i64 } [[TMP16]], 0
57; IR-NEXT:    [[TMP18:%.*]] = extractvalue { i1, i64 } [[TMP16]], 1
58; IR-NEXT:    br i1 [[TMP17]], label [[LEAFBLOCK3:%.*]], label [[FLOW14:%.*]]
59; IR:       LeafBlock3:
60; IR-NEXT:    [[SWITCHLEAF4:%.*]] = icmp eq i32 [[TMP0]], 0
61; IR-NEXT:    [[SWITCHLEAF4_INV:%.*]] = xor i1 [[SWITCHLEAF4]], true
62; IR-NEXT:    br label [[FLOW14]]
63; IR:       Flow14:
64; IR-NEXT:    [[TMP19:%.*]] = phi i1 [ [[SWITCHLEAF4_INV]], [[LEAFBLOCK3]] ], [ [[TMP14]], [[FLOW13]] ]
65; IR-NEXT:    [[TMP20:%.*]] = phi i1 [ [[SWITCHLEAF4]], [[LEAFBLOCK3]] ], [ [[TMP15]], [[FLOW13]] ]
66; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP18]])
67; IR-NEXT:    [[TMP21:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP20]])
68; IR-NEXT:    [[TMP22:%.*]] = extractvalue { i1, i64 } [[TMP21]], 0
69; IR-NEXT:    [[TMP23:%.*]] = extractvalue { i1, i64 } [[TMP21]], 1
70; IR-NEXT:    br i1 [[TMP22]], label [[LAND_LHS_TRUE_I:%.*]], label [[FLOW15]]
71; IR:       land.lhs.true.i:
72; IR-NEXT:    br label [[LEAFBLOCK9:%.*]]
73; IR:       Flow15:
74; IR-NEXT:    [[TMP24]] = phi i1 [ [[TMP29:%.*]], [[FLOW16:%.*]] ], [ false, [[FLOW14]] ]
75; IR-NEXT:    [[TMP25]] = phi i1 [ [[TMP30:%.*]], [[FLOW16]] ], [ [[TMP19]], [[FLOW14]] ]
76; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP23]])
77; IR-NEXT:    br label [[FLOW12]]
78; IR:       LeafBlock9:
79; IR-NEXT:    [[SWITCHLEAF10:%.*]] = icmp sgt i32 [[TMP0]], 1
80; IR-NEXT:    [[TMP26:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[SWITCHLEAF10]])
81; IR-NEXT:    [[TMP27:%.*]] = extractvalue { i1, i64 } [[TMP26]], 0
82; IR-NEXT:    [[TMP28:%.*]] = extractvalue { i1, i64 } [[TMP26]], 1
83; IR-NEXT:    br i1 [[TMP27]], label [[DO_BODY_I_I_I_I:%.*]], label [[FLOW16]]
84; IR:       do.body.i.i.i.i:
85; IR-NEXT:    tail call fastcc void null()
86; IR-NEXT:    br label [[FLOW16]]
87; IR:       Flow16:
88; IR-NEXT:    [[TMP29]] = phi i1 [ true, [[DO_BODY_I_I_I_I]] ], [ false, [[LEAFBLOCK9]] ]
89; IR-NEXT:    [[TMP30]] = phi i1 [ false, [[DO_BODY_I_I_I_I]] ], [ true, [[LEAFBLOCK9]] ]
90; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP28]])
91; IR-NEXT:    br label [[FLOW15]]
92; IR:       do.body:
93; IR-NEXT:    tail call fastcc void null()
94; IR-NEXT:    br label [[FLOW17]]
95; IR:       Flow17:
96; IR-NEXT:    [[TMP31:%.*]] = phi i1 [ true, [[DO_BODY]] ], [ [[TMP4]], [[FLOW11]] ]
97; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]])
98; IR-NEXT:    [[TMP32:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[TMP31]])
99; IR-NEXT:    [[TMP33:%.*]] = extractvalue { i1, i64 } [[TMP32]], 0
100; IR-NEXT:    [[TMP34:%.*]] = extractvalue { i1, i64 } [[TMP32]], 1
101; IR-NEXT:    br i1 [[TMP33]], label [[UNIFIEDUNREACHABLEBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]]
102; IR:       UnifiedUnreachableBlock:
103; IR-NEXT:    call void @llvm.amdgcn.unreachable()
104; IR-NEXT:    br label [[UNIFIEDRETURNBLOCK]]
105; IR:       UnifiedReturnBlock:
106; IR-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP34]])
107; IR-NEXT:    ret void
108;
109; GCN-LABEL: my_func:
110; GCN:       ; %bb.0: ; %entry
111; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GCN-NEXT:    s_mov_b64 s[4:5], 0
113; GCN-NEXT:    s_load_dword s10, s[4:5], 0x0
114; GCN-NEXT:    s_mov_b64 s[8:9], -1
115; GCN-NEXT:    s_waitcnt lgkmcnt(0)
116; GCN-NEXT:    s_cmp_lt_i32 s10, 1
117; GCN-NEXT:    s_mov_b64 s[6:7], 0
118; GCN-NEXT:    s_cbranch_scc1 .LBB0_7
119; GCN-NEXT:  ; %bb.1: ; %LeafBlock1
120; GCN-NEXT:    s_cmp_lg_u32 s10, 1
121; GCN-NEXT:    s_cselect_b64 s[6:7], -1, 0
122; GCN-NEXT:    s_mov_b64 vcc, exec
123; GCN-NEXT:    s_cbranch_execz .LBB0_8
124; GCN-NEXT:  .LBB0_2: ; %Flow11
125; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
126; GCN-NEXT:  .LBB0_3: ; %do.body
127; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
128; GCN-NEXT:  .LBB0_4: ; %Flow17
129; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
130; GCN-NEXT:    s_and_saveexec_b64 s[6:7], s[4:5]
131; GCN-NEXT:  ; %bb.5: ; %UnifiedUnreachableBlock
132; GCN-NEXT:    ; divergent unreachable
133; GCN-NEXT:  ; %bb.6: ; %UnifiedReturnBlock
134; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
135; GCN-NEXT:    s_setpc_b64 s[30:31]
136; GCN-NEXT:  .LBB0_7: ; %Flow
137; GCN-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
138; GCN-NEXT:    s_cbranch_vccnz .LBB0_2
139; GCN-NEXT:  .LBB0_8: ; %LeafBlock
140; GCN-NEXT:    s_cmp_eq_u32 s10, 0
141; GCN-NEXT:    s_cbranch_scc1 .LBB0_10
142; GCN-NEXT:  ; %bb.9:
143; GCN-NEXT:    s_mov_b64 s[6:7], -1
144; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
145; GCN-NEXT:    s_cbranch_execnz .LBB0_3
146; GCN-NEXT:    s_branch .LBB0_4
147; GCN-NEXT:  .LBB0_10: ; %NodeBlock7
148; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
149; GCN-NEXT:    s_mov_b64 s[10:11], 0
150; GCN-NEXT:    s_mov_b64 s[6:7], 0
151; GCN-NEXT:    s_and_saveexec_b64 s[4:5], vcc
152; GCN-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
153; GCN-NEXT:  ; %bb.11: ; %LeafBlock5
154; GCN-NEXT:    s_mov_b64 s[6:7], exec
155; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 2, v0
156; GCN-NEXT:    s_and_b64 s[10:11], vcc, exec
157; GCN-NEXT:  ; %bb.12: ; %Flow13
158; GCN-NEXT:    s_andn2_saveexec_b64 s[8:9], s[4:5]
159; GCN-NEXT:  ; %bb.13: ; %LeafBlock3
160; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
161; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
162; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
163; GCN-NEXT:    s_andn2_b64 s[10:11], s[10:11], exec
164; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], exec
165; GCN-NEXT:    s_and_b64 s[12:13], vcc, exec
166; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[4:5]
167; GCN-NEXT:    s_or_b64 s[10:11], s[10:11], s[12:13]
168; GCN-NEXT:  ; %bb.14: ; %Flow14
169; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
170; GCN-NEXT:    s_mov_b64 s[4:5], 0
171; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[10:11]
172; GCN-NEXT:    s_cbranch_execz .LBB0_18
173; GCN-NEXT:  ; %bb.15: ; %LeafBlock9
174; GCN-NEXT:    v_cmp_lt_i32_e32 vcc, 1, v0
175; GCN-NEXT:    s_mov_b64 s[10:11], -1
176; GCN-NEXT:    s_and_saveexec_b64 s[12:13], vcc
177; GCN-NEXT:  ; %bb.16: ; %do.body.i.i.i.i
178; GCN-NEXT:    s_mov_b64 s[4:5], exec
179; GCN-NEXT:    s_xor_b64 s[10:11], exec, -1
180; GCN-NEXT:  ; %bb.17: ; %Flow16
181; GCN-NEXT:    s_or_b64 exec, exec, s[12:13]
182; GCN-NEXT:    s_and_b64 s[4:5], s[4:5], exec
183; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
184; GCN-NEXT:    s_and_b64 s[10:11], s[10:11], exec
185; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[10:11]
186; GCN-NEXT:  .LBB0_18: ; %Flow15
187; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
188; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[6:7]
189; GCN-NEXT:    s_cbranch_execnz .LBB0_3
190; GCN-NEXT:    s_branch .LBB0_4
191entry:
192  %1 = load i32, ptr addrspace(4) null, align 8
193  switch i32 %1, label %do.body [
194  i32 1, label %sw.bb
195  i32 0, label %sw.bb2
196  ]
197
198sw.bb:
199  ret void
200
201sw.bb2:
202  switch i32 %0, label %do.body [
203  i32 0, label %land.lhs.true.i
204  i32 2, label %land.lhs.true.i
205  ]
206
207land.lhs.true.i:
208  switch i32 %0, label %do.body.i.i.i.i [
209  i32 0, label %do.body
210  i32 1, label %do.body
211  ]
212
213do.body.i.i.i.i:
214  tail call fastcc void null()
215  unreachable
216
217do.body:
218  tail call fastcc void null()
219  unreachable
220
221}
222