xref: /llvm-project/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
3; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs | FileCheck %s
4
5declare void @llvm.trap()
6declare i32 @llvm.amdgcn.workitem.id.x()
7
8define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
9; This used to bypass the structurization process because structurizer is unable to
10; handle multiple-exits CFG. This should be correctly structurized.
11; CHECK-LABEL: kernel:
12; CHECK:       ; %bb.0: ; %entry
13; CHECK-NEXT:    s_load_dword s0, s[8:9], 0x10
14; CHECK-NEXT:    s_load_dword s10, s[8:9], 0x0
15; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
16; CHECK-NEXT:    s_cmpk_lg_i32 s0, 0x100
17; CHECK-NEXT:    s_cbranch_scc0 .LBB0_6
18; CHECK-NEXT:  ; %bb.1: ; %if.else
19; CHECK-NEXT:    v_cmp_gt_u32_e32 vcc, 10, v0
20; CHECK-NEXT:    s_mov_b64 s[4:5], 0
21; CHECK-NEXT:    s_mov_b64 s[2:3], 0
22; CHECK-NEXT:    s_mov_b64 s[0:1], 0
23; CHECK-NEXT:    s_and_saveexec_b64 s[6:7], vcc
24; CHECK-NEXT:    s_cbranch_execz .LBB0_5
25; CHECK-NEXT:  ; %bb.2: ; %if.then3
26; CHECK-NEXT:    s_cmp_lg_u32 s10, 0
27; CHECK-NEXT:    s_cbranch_scc1 .LBB0_14
28; CHECK-NEXT:  ; %bb.3:
29; CHECK-NEXT:    s_mov_b64 s[0:1], -1
30; CHECK-NEXT:  .LBB0_4: ; %Flow3
31; CHECK-NEXT:    s_and_b64 s[0:1], s[0:1], exec
32; CHECK-NEXT:    s_and_b64 s[2:3], s[2:3], exec
33; CHECK-NEXT:  .LBB0_5: ; %Flow2
34; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
35; CHECK-NEXT:    s_and_b64 vcc, exec, s[4:5]
36; CHECK-NEXT:    s_cbranch_vccz .LBB0_8
37; CHECK-NEXT:    s_branch .LBB0_7
38; CHECK-NEXT:  .LBB0_6:
39; CHECK-NEXT:    s_mov_b64 s[2:3], 0
40; CHECK-NEXT:    s_mov_b64 s[0:1], 0
41; CHECK-NEXT:    s_cbranch_execz .LBB0_8
42; CHECK-NEXT:  .LBB0_7: ; %if.then
43; CHECK-NEXT:    s_cmp_lg_u32 s10, 0
44; CHECK-NEXT:    s_mov_b64 s[0:1], -1
45; CHECK-NEXT:    s_cbranch_scc1 .LBB0_13
46; CHECK-NEXT:  .LBB0_8: ; %Flow4
47; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
48; CHECK-NEXT:  .LBB0_9: ; %UnifiedUnreachableBlock
49; CHECK-NEXT:    ; divergent unreachable
50; CHECK-NEXT:  .LBB0_10: ; %Flow6
51; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
52; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], s[0:1]
53; CHECK-NEXT:    s_cbranch_execz .LBB0_12
54; CHECK-NEXT:  ; %bb.11: ; %if.end6.sink.split
55; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x8
56; CHECK-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
57; CHECK-NEXT:    v_mov_b32_e32 v1, s10
58; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
59; CHECK-NEXT:    global_store_dword v0, v1, s[0:1]
60; CHECK-NEXT:  .LBB0_12: ; %UnifiedReturnBlock
61; CHECK-NEXT:    s_endpgm
62; CHECK-NEXT:  .LBB0_13: ; %cond.false
63; CHECK-NEXT:    s_mov_b64 s[0:1], 0
64; CHECK-NEXT:    s_or_b64 s[2:3], s[2:3], exec
65; CHECK-NEXT:    s_trap 2
66; CHECK-NEXT:    s_and_saveexec_b64 s[4:5], s[2:3]
67; CHECK-NEXT:    s_cbranch_execnz .LBB0_9
68; CHECK-NEXT:    s_branch .LBB0_10
69; CHECK-NEXT:  .LBB0_14: ; %cond.false.i8
70; CHECK-NEXT:    s_mov_b64 s[2:3], -1
71; CHECK-NEXT:    s_trap 2
72; CHECK-NEXT:    s_branch .LBB0_4
73
74
75entry:
76  %tid = call i32 @llvm.amdgcn.workitem.id.x()
77  %cmp = icmp eq i32 %n, 256
78  br i1 %cmp, label %if.then, label %if.else
79
80if.then:
81  %cmp1 = icmp eq i32 %a, 0
82  br i1 %cmp1, label %if.end6.sink.split, label %cond.false
83
84cond.false:
85  call void @llvm.trap()
86  unreachable
87
88if.else:
89  %cmp2 = icmp ult i32 %tid, 10
90  br i1 %cmp2, label %if.then3, label %if.end6
91
92if.then3:
93  %cmp1.i7 = icmp eq i32 %a, 0
94  br i1 %cmp1.i7, label %if.end6.sink.split, label %cond.false.i8
95
96cond.false.i8:
97  call void @llvm.trap()
98  unreachable
99
100if.end6.sink.split:
101  %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid
102  store i32 %a, ptr addrspace(1) %x1, align 4
103  br label %if.end6
104
105if.end6:
106  ret void
107}
108;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
109; UNIFY: {{.*}}
110