xref: /llvm-project/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll (revision faa2c678aa1963147af35c3700e6b44c264af99f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; Where the mask of lanes wanting to exit the loop on this iteration is not
5; obviously already masked by exec (in this case, the xor with -1 inserted by
6; control flow annotation), then lower control flow must insert an S_AND_B64
7; with exec.
8
9define void @needs_and(i32 %arg) {
10; GCN-LABEL: needs_and:
11; GCN:       ; %bb.0: ; %entry
12; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GCN-NEXT:    s_mov_b32 s10, 1
14; GCN-NEXT:    s_mov_b64 s[6:7], 0
15; GCN-NEXT:    s_branch .LBB0_2
16; GCN-NEXT:  .LBB0_1: ; %endif
17; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
18; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
19; GCN-NEXT:    s_and_b64 s[4:5], exec, vcc
20; GCN-NEXT:    s_or_b64 s[6:7], s[4:5], s[6:7]
21; GCN-NEXT:    s_add_i32 s10, s10, 1
22; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
23; GCN-NEXT:    s_cbranch_execz .LBB0_4
24; GCN-NEXT:  .LBB0_2: ; %loop
25; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
26; GCN-NEXT:    v_cmp_gt_u32_e64 s[4:5], s10, v0
27; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s10, v0
28; GCN-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
29; GCN-NEXT:    s_cbranch_execz .LBB0_1
30; GCN-NEXT:  ; %bb.3: ; %then
31; GCN-NEXT:    ; in Loop: Header=BB0_2 Depth=1
32; GCN-NEXT:    s_nop 1
33; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], s4
34; GCN-NEXT:    s_branch .LBB0_1
35; GCN-NEXT:  .LBB0_4: ; %loopexit
36; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
37; GCN-NEXT:    s_waitcnt vmcnt(0)
38; GCN-NEXT:    s_setpc_b64 s[30:31]
39entry:
40  br label %loop
41
42loop:
43  %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
44  %tmp23 = add nuw i32 %tmp23phi, 1
45  %tmp27 = icmp ult i32 %arg, %tmp23
46  br i1 %tmp27, label %then, label %endif
47
48then:                                             ; preds = %bb
49  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
50  br label %endif
51
52endif:                                             ; preds = %bb28, %bb
53  br i1 %tmp27, label %loop, label %loopexit
54
55loopexit:
56  ret void
57}
58
59; Where the mask of lanes wanting to exit the loop on this iteration is
60; obviously already masked by exec (a V_CMP), then lower control flow can omit
61; the S_AND_B64 to avoid an unnecessary instruction.
62
63define void @doesnt_need_and(i32 %arg) {
64; GCN-LABEL: doesnt_need_and:
65; GCN:       ; %bb.0: ; %entry
66; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; GCN-NEXT:    s_mov_b32 s6, 0
68; GCN-NEXT:    s_mov_b64 s[4:5], 0
69; GCN-NEXT:  .LBB1_1: ; %loop
70; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
71; GCN-NEXT:    s_add_i32 s6, s6, 1
72; GCN-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
73; GCN-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
74; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], s4
75; GCN-NEXT:    s_andn2_b64 exec, exec, s[4:5]
76; GCN-NEXT:    s_cbranch_execnz .LBB1_1
77; GCN-NEXT:  ; %bb.2: ; %loopexit
78; GCN-NEXT:    s_or_b64 exec, exec, s[4:5]
79; GCN-NEXT:    s_waitcnt vmcnt(0)
80; GCN-NEXT:    s_setpc_b64 s[30:31]
81entry:
82  br label %loop
83
84loop:
85  %tmp23phi = phi i32 [ %tmp23, %loop ], [ 0, %entry ]
86  %tmp23 = add nuw i32 %tmp23phi, 1
87  %tmp27 = icmp ult i32 %arg, %tmp23
88  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
89  br i1 %tmp27, label %loop, label %loopexit
90
91loopexit:
92  ret void
93}
94
95; Another case where the mask of lanes wanting to exit the loop is not masked
96; by exec, because it is a function parameter.
97
98define void @break_cond_is_arg(i32 %arg, i1 %breakcond) {
99; GCN-LABEL: break_cond_is_arg:
100; GCN:       ; %bb.0: ; %entry
101; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
103; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
104; GCN-NEXT:    s_xor_b64 s[4:5], vcc, -1
105; GCN-NEXT:    s_mov_b32 s10, 1
106; GCN-NEXT:    s_mov_b64 s[6:7], 0
107; GCN-NEXT:    s_branch .LBB2_2
108; GCN-NEXT:  .LBB2_1: ; %endif
109; GCN-NEXT:    ; in Loop: Header=BB2_2 Depth=1
110; GCN-NEXT:    s_or_b64 exec, exec, s[8:9]
111; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
112; GCN-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
113; GCN-NEXT:    s_add_i32 s10, s10, 1
114; GCN-NEXT:    s_andn2_b64 exec, exec, s[6:7]
115; GCN-NEXT:    s_cbranch_execz .LBB2_4
116; GCN-NEXT:  .LBB2_2: ; %loop
117; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
118; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, s10, v0
119; GCN-NEXT:    s_and_saveexec_b64 s[8:9], vcc
120; GCN-NEXT:    s_cbranch_execz .LBB2_1
121; GCN-NEXT:  ; %bb.3: ; %then
122; GCN-NEXT:    ; in Loop: Header=BB2_2 Depth=1
123; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], s4
124; GCN-NEXT:    s_branch .LBB2_1
125; GCN-NEXT:  .LBB2_4: ; %loopexit
126; GCN-NEXT:    s_or_b64 exec, exec, s[6:7]
127; GCN-NEXT:    s_waitcnt vmcnt(0)
128; GCN-NEXT:    s_setpc_b64 s[30:31]
129entry:
130  br label %loop
131
132loop:
133  %tmp23phi = phi i32 [ %tmp23, %endif ], [ 0, %entry ]
134  %tmp23 = add nuw i32 %tmp23phi, 1
135  %tmp27 = icmp ult i32 %arg, %tmp23
136  br i1 %tmp27, label %then, label %endif
137
138then:                                             ; preds = %bb
139  call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float undef, ptr addrspace(8) undef, i32 0, i32 undef, i32 0)
140  br label %endif
141
142endif:                                             ; preds = %bb28, %bb
143  br i1 %breakcond, label %loop, label %loopexit
144
145loopexit:
146  ret void
147}
148
149declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
150
151attributes #0 = { nounwind writeonly }
152