xref: /llvm-project/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-loop-phi.mir (revision e7900e695e7dfb36be8651d914a31f42a5d6c634)
1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra -o - %s | FileCheck %s
3
4
5# Cannot fold this without moving the def of %7 after the and.
6---
7name:            no_fold_andn2_select_condition_live_out_phi
8tracksRegLiveness: true
9body:             |
10  ; CHECK-LABEL: name: no_fold_andn2_select_condition_live_out_phi
11  ; CHECK: bb.0:
12  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
13  ; CHECK-NEXT: {{  $}}
14  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
15  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
16  ; CHECK-NEXT:   S_BRANCH %bb.2
17  ; CHECK-NEXT: {{  $}}
18  ; CHECK-NEXT: bb.1:
19  ; CHECK-NEXT:   S_ENDPGM 0
20  ; CHECK-NEXT: {{  $}}
21  ; CHECK-NEXT: bb.2:
22  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
23  ; CHECK-NEXT: {{  $}}
24  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[S_MOV_B64_]], implicit $exec
25  ; CHECK-NEXT:   V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
26  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]].sub0
27  ; CHECK-NEXT:   DS_WRITE_B64_gfx9 undef %3:vgpr_32, [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
28  ; CHECK-NEXT:   ATOMIC_FENCE 4, 2
29  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
30  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
31  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
32  ; CHECK-NEXT:   S_BRANCH %bb.2
33  bb.0:
34    successors: %bb.2
35
36    %7:sreg_64_xexec = S_MOV_B64 -1
37    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
38    S_BRANCH %bb.2
39
40  bb.1:
41    S_ENDPGM 0
42
43  bb.2:
44    successors: %bb.1, %bb.2
45
46    %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %7, implicit $exec
47    V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
48    %5.sub1:vreg_64 = COPY %5.sub0
49    DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3)
50    ATOMIC_FENCE 4, 2
51    %7:sreg_64_xexec = S_MOV_B64 0
52    $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
53    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
54    S_BRANCH %bb.2
55
56...
57
58# It's OK to fold this, since the phi def is after the andn2 insert point.
59---
60name:            fold_andn2_select_condition_live_out_phi_reorder
61tracksRegLiveness: true
62body:             |
63  ; CHECK-LABEL: name: fold_andn2_select_condition_live_out_phi_reorder
64  ; CHECK: bb.0:
65  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
66  ; CHECK-NEXT: {{  $}}
67  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 -1
68  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
69  ; CHECK-NEXT:   S_BRANCH %bb.2
70  ; CHECK-NEXT: {{  $}}
71  ; CHECK-NEXT: bb.1:
72  ; CHECK-NEXT:   S_ENDPGM 0
73  ; CHECK-NEXT: {{  $}}
74  ; CHECK-NEXT: bb.2:
75  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
76  ; CHECK-NEXT: {{  $}}
77  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]].sub0
78  ; CHECK-NEXT:   DS_WRITE_B64_gfx9 undef %3:vgpr_32, [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
79  ; CHECK-NEXT:   ATOMIC_FENCE 4, 2
80  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, [[S_MOV_B64_]], implicit-def dead $scc
81  ; CHECK-NEXT:   [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 0
82  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
83  ; CHECK-NEXT:   S_BRANCH %bb.2
84  bb.0:
85    successors: %bb.2
86
87    %7:sreg_64_xexec = S_MOV_B64 -1
88    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
89    S_BRANCH %bb.2
90
91  bb.1:
92    S_ENDPGM 0
93
94  bb.2:
95    successors: %bb.1, %bb.2
96
97    %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %7, implicit $exec
98    V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
99    %5.sub1:vreg_64 = COPY %5.sub0
100    DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3)
101    ATOMIC_FENCE 4, 2
102    $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
103    %7:sreg_64_xexec = S_MOV_B64 0
104    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
105    S_BRANCH %bb.2
106
107...
108
109---
110name:            no_fold_andn2_select_condition_live_out_phi_physreg
111tracksRegLiveness: true
112body:             |
113  ; CHECK-LABEL: name: no_fold_andn2_select_condition_live_out_phi_physreg
114  ; CHECK: bb.0:
115  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
116  ; CHECK-NEXT: {{  $}}
117  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 -1
118  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
119  ; CHECK-NEXT:   S_BRANCH %bb.2
120  ; CHECK-NEXT: {{  $}}
121  ; CHECK-NEXT: bb.1:
122  ; CHECK-NEXT:   S_ENDPGM 0
123  ; CHECK-NEXT: {{  $}}
124  ; CHECK-NEXT: bb.2:
125  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
126  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
127  ; CHECK-NEXT: {{  $}}
128  ; CHECK-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
129  ; CHECK-NEXT:   V_CMP_NE_U32_e32 1, [[V_CNDMASK_B32_e64_]], implicit-def $vcc, implicit $exec
130  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]].sub0
131  ; CHECK-NEXT:   DS_WRITE_B64_gfx9 undef %2:vgpr_32, [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
132  ; CHECK-NEXT:   ATOMIC_FENCE 4, 2
133  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 0
134  ; CHECK-NEXT:   $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
135  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
136  ; CHECK-NEXT:   S_BRANCH %bb.2
137  bb.0:
138    successors: %bb.2
139
140    $sgpr4_sgpr5 = S_MOV_B64 -1
141    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
142    S_BRANCH %bb.2
143
144  bb.1:
145    S_ENDPGM 0
146
147  bb.2:
148    successors: %bb.1, %bb.2
149    liveins: $sgpr4_sgpr5
150
151    %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
152    V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
153    %5.sub1:vreg_64 = COPY %5.sub0
154    DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3)
155    ATOMIC_FENCE 4, 2
156    $sgpr4_sgpr5 = S_MOV_B64 0
157    $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
158    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
159    S_BRANCH %bb.2
160
161...
162
163---
164name:            fold_andn2_select_condition_live_out_phi_physreg_reorder
165tracksRegLiveness: true
166body:             |
167  ; CHECK-LABEL: name: fold_andn2_select_condition_live_out_phi_physreg_reorder
168  ; CHECK: bb.0:
169  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
170  ; CHECK-NEXT: {{  $}}
171  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 -1
172  ; CHECK-NEXT:   undef [[V_MOV_B32_e32_:%[0-9]+]].sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
173  ; CHECK-NEXT:   S_BRANCH %bb.2
174  ; CHECK-NEXT: {{  $}}
175  ; CHECK-NEXT: bb.1:
176  ; CHECK-NEXT:   S_ENDPGM 0
177  ; CHECK-NEXT: {{  $}}
178  ; CHECK-NEXT: bb.2:
179  ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
180  ; CHECK-NEXT:   liveins: $sgpr4_sgpr5
181  ; CHECK-NEXT: {{  $}}
182  ; CHECK-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]].sub1:vreg_64 = COPY [[V_MOV_B32_e32_]].sub0
183  ; CHECK-NEXT:   DS_WRITE_B64_gfx9 undef %2:vgpr_32, [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (store (s64), addrspace 3)
184  ; CHECK-NEXT:   ATOMIC_FENCE 4, 2
185  ; CHECK-NEXT:   $vcc = S_ANDN2_B64 $exec, $sgpr4_sgpr5, implicit-def dead $scc
186  ; CHECK-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 0
187  ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
188  ; CHECK-NEXT:   S_BRANCH %bb.2
189  bb.0:
190    successors: %bb.2
191
192    $sgpr4_sgpr5 = S_MOV_B64 -1
193    undef %5.sub0:vreg_64 = V_MOV_B32_e32 0, implicit $exec
194    S_BRANCH %bb.2
195
196  bb.1:
197    S_ENDPGM 0
198
199  bb.2:
200    successors: %bb.1, %bb.2
201    liveins: $sgpr4_sgpr5
202
203    %4:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr4_sgpr5, implicit $exec
204    V_CMP_NE_U32_e32 1, %4, implicit-def $vcc, implicit $exec
205    %5.sub1:vreg_64 = COPY %5.sub0
206    DS_WRITE_B64_gfx9 undef %6:vgpr_32, %5, 0, 0, implicit $exec :: (store (s64), addrspace 3)
207    ATOMIC_FENCE 4, 2
208    $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
209    $sgpr4_sgpr5 = S_MOV_B64 0
210    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
211    S_BRANCH %bb.2
212
213...
214