xref: /llvm-project/llvm/test/CodeGen/AMDGPU/optimize-negated-cond.ll (revision eeac0ffaf46cf9f9b0f680b9940cc4b68a0286d8)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) {
5; GCN-LABEL: negated_cond:
6; GCN:       ; %bb.0: ; %bb
7; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
8; GCN-NEXT:    s_mov_b32 s7, 0xf000
9; GCN-NEXT:    s_mov_b32 s10, -1
10; GCN-NEXT:    s_mov_b32 s6, 0
11; GCN-NEXT:    s_mov_b32 s11, s7
12; GCN-NEXT:    s_waitcnt lgkmcnt(0)
13; GCN-NEXT:    s_mov_b32 s8, s4
14; GCN-NEXT:    s_mov_b32 s9, s5
15; GCN-NEXT:    v_mov_b32_e32 v0, 0
16; GCN-NEXT:    s_branch .LBB0_2
17; GCN-NEXT:  .LBB0_1: ; %loop.exit.guard
18; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
19; GCN-NEXT:    s_and_b64 vcc, exec, s[14:15]
20; GCN-NEXT:    s_cbranch_vccnz .LBB0_9
21; GCN-NEXT:  .LBB0_2: ; %bb1
22; GCN-NEXT:    ; =>This Loop Header: Depth=1
23; GCN-NEXT:    ; Child Loop BB0_4 Depth 2
24; GCN-NEXT:    buffer_load_dword v1, off, s[8:11], 0
25; GCN-NEXT:    s_waitcnt vmcnt(0)
26; GCN-NEXT:    v_cmp_ne_u32_e64 s[2:3], 0, v1
27; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
28; GCN-NEXT:    v_cndmask_b32_e64 v1, 0, 1, vcc
29; GCN-NEXT:    v_cmp_ne_u32_e64 s[0:1], 1, v1
30; GCN-NEXT:    s_mov_b32 s12, s6
31; GCN-NEXT:    s_branch .LBB0_4
32; GCN-NEXT:  .LBB0_3: ; %Flow1
33; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
34; GCN-NEXT:    s_andn2_b64 vcc, exec, s[16:17]
35; GCN-NEXT:    s_cbranch_vccz .LBB0_1
36; GCN-NEXT:  .LBB0_4: ; %bb2
37; GCN-NEXT:    ; Parent Loop BB0_2 Depth=1
38; GCN-NEXT:    ; => This Inner Loop Header: Depth=2
39; GCN-NEXT:    s_and_b64 vcc, exec, s[0:1]
40; GCN-NEXT:    s_lshl_b32 s12, s12, 5
41; GCN-NEXT:    s_cbranch_vccz .LBB0_6
42; GCN-NEXT:  ; %bb.5: ; in Loop: Header=BB0_4 Depth=2
43; GCN-NEXT:    s_mov_b64 s[14:15], s[2:3]
44; GCN-NEXT:    s_branch .LBB0_7
45; GCN-NEXT:  .LBB0_6: ; %bb3
46; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
47; GCN-NEXT:    s_add_i32 s12, s12, 1
48; GCN-NEXT:    s_mov_b64 s[14:15], -1
49; GCN-NEXT:  .LBB0_7: ; %Flow
50; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
51; GCN-NEXT:    s_andn2_b64 vcc, exec, s[14:15]
52; GCN-NEXT:    s_mov_b64 s[16:17], -1
53; GCN-NEXT:    s_cbranch_vccnz .LBB0_3
54; GCN-NEXT:  ; %bb.8: ; %bb4
55; GCN-NEXT:    ; in Loop: Header=BB0_4 Depth=2
56; GCN-NEXT:    s_ashr_i32 s13, s12, 31
57; GCN-NEXT:    s_lshl_b64 s[16:17], s[12:13], 2
58; GCN-NEXT:    s_mov_b64 s[14:15], 0
59; GCN-NEXT:    v_mov_b32_e32 v1, s16
60; GCN-NEXT:    v_mov_b32_e32 v2, s17
61; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
62; GCN-NEXT:    s_cmp_eq_u32 s12, 32
63; GCN-NEXT:    s_cselect_b64 s[16:17], -1, 0
64; GCN-NEXT:    s_branch .LBB0_3
65; GCN-NEXT:  .LBB0_9: ; %DummyReturnBlock
66; GCN-NEXT:    s_endpgm
67bb:
68  br label %bb1
69
70bb1:
71  %tmp1 = load i32, ptr addrspace(1) %arg1
72  %tmp2 = icmp eq i32 %tmp1, 0
73  br label %bb2
74
75bb2:
76  %tmp3 = phi i32 [ 0, %bb1 ], [ %tmp6, %bb4 ]
77  %tmp4 = shl i32 %tmp3, 5
78  br i1 %tmp2, label %bb3, label %bb4
79
80bb3:
81  %tmp5 = add i32 %tmp4, 1
82  br label %bb4
83
84bb4:
85  %tmp6 = phi i32 [ %tmp5, %bb3 ], [ %tmp4, %bb2 ]
86  %gep = getelementptr inbounds i32, ptr addrspace(1) %arg1, i32 %tmp6
87  store i32 0, ptr addrspace(1) %gep
88  %tmp7 = icmp eq i32 %tmp6, 32
89  br i1 %tmp7, label %bb1, label %bb2
90}
91
92define amdgpu_kernel void @negated_cond_dominated_blocks(ptr addrspace(1) %arg1) {
93; GCN-LABEL: negated_cond_dominated_blocks:
94; GCN:       ; %bb.0: ; %bb
95; GCN-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x9
96; GCN-NEXT:    s_waitcnt lgkmcnt(0)
97; GCN-NEXT:    s_load_dword s0, s[4:5], 0x0
98; GCN-NEXT:    s_mov_b32 s6, 0
99; GCN-NEXT:    s_mov_b32 s7, 0xf000
100; GCN-NEXT:    s_waitcnt lgkmcnt(0)
101; GCN-NEXT:    s_cmp_lg_u32 s0, 0
102; GCN-NEXT:    s_cselect_b64 s[0:1], -1, 0
103; GCN-NEXT:    s_and_b64 s[0:1], exec, s[0:1]
104; GCN-NEXT:    v_mov_b32_e32 v0, 0
105; GCN-NEXT:    s_mov_b32 s3, s6
106; GCN-NEXT:    s_branch .LBB1_2
107; GCN-NEXT:  .LBB1_1: ; %bb7
108; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
109; GCN-NEXT:    s_ashr_i32 s3, s2, 31
110; GCN-NEXT:    s_lshl_b64 s[8:9], s[2:3], 2
111; GCN-NEXT:    v_mov_b32_e32 v1, s8
112; GCN-NEXT:    v_mov_b32_e32 v2, s9
113; GCN-NEXT:    s_cmp_eq_u32 s2, 32
114; GCN-NEXT:    buffer_store_dword v0, v[1:2], s[4:7], 0 addr64
115; GCN-NEXT:    s_mov_b32 s3, s2
116; GCN-NEXT:    s_cbranch_scc1 .LBB1_6
117; GCN-NEXT:  .LBB1_2: ; %bb4
118; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
119; GCN-NEXT:    s_mov_b64 vcc, s[0:1]
120; GCN-NEXT:    s_cbranch_vccz .LBB1_4
121; GCN-NEXT:  ; %bb.3: ; %bb6
122; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
123; GCN-NEXT:    s_add_i32 s2, s3, 1
124; GCN-NEXT:    s_mov_b64 vcc, exec
125; GCN-NEXT:    s_cbranch_execnz .LBB1_1
126; GCN-NEXT:    s_branch .LBB1_5
127; GCN-NEXT:  .LBB1_4: ; in Loop: Header=BB1_2 Depth=1
128; GCN-NEXT:    ; implicit-def: $sgpr2
129; GCN-NEXT:    s_mov_b64 vcc, 0
130; GCN-NEXT:  .LBB1_5: ; %bb5
131; GCN-NEXT:    ; in Loop: Header=BB1_2 Depth=1
132; GCN-NEXT:    s_lshl_b32 s2, s3, 5
133; GCN-NEXT:    s_or_b32 s2, s2, 1
134; GCN-NEXT:    s_branch .LBB1_1
135; GCN-NEXT:  .LBB1_6: ; %bb3
136; GCN-NEXT:    s_endpgm
137bb:
138  br label %bb2
139
140bb2:
141  %tmp1 = load i32, ptr addrspace(1) %arg1
142  %tmp2 = icmp eq i32 %tmp1, 0
143  br label %bb4
144
145bb3:
146  ret void
147
148bb4:
149  %tmp3 = phi i32 [ 0, %bb2 ], [ %tmp7, %bb7 ]
150  %tmp4 = shl i32 %tmp3, 5
151  br i1 %tmp2, label %bb5, label %bb6
152
153bb5:
154  %tmp5 = add i32 %tmp4, 1
155  br label %bb7
156
157bb6:
158  %tmp6 = add i32 %tmp3, 1
159  br label %bb7
160
161bb7:
162  %tmp7 = phi i32 [ %tmp5, %bb5 ], [ %tmp6, %bb6 ]
163  %gep = getelementptr inbounds i32, ptr addrspace(1) %arg1, i32 %tmp7
164  store i32 0, ptr addrspace(1) %gep
165  %tmp8 = icmp eq i32 %tmp7, 32
166  br i1 %tmp8, label %bb3, label %bb4
167}
168