xref: /llvm-project/llvm/test/CodeGen/AMDGPU/select-undef.ll (revision 47685633a7dc74451acbc551b111929166d4d0bd)
1; RUN: llc -amdgpu-scalar-ir-passes=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}select_undef_lhs:
4; GCN: s_waitcnt
5; GCN-NOT: v_cmp
6; GCN-NOT: v_cndmask
7; GCN-NEXT: s_setpc_b64
8define float @select_undef_lhs(float %val, i1 %cond) {
9  %sel = select i1 %cond, float undef, float %val
10  ret float %sel
11}
12
13; GCN-LABEL: {{^}}select_undef_rhs:
14; GCN: s_waitcnt
15; GCN-NOT: v_cmp
16; GCN-NOT: v_cndmask
17; GCN-NEXT: s_setpc_b64
18define float @select_undef_rhs(float %val, i1 %cond) {
19  %sel = select i1 %cond, float %val, float undef
20  ret float %sel
21}
22
23; GCN-LABEL: {{^}}select_undef_n1:
24; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 1.0
25; GCN: store_dword {{[^,]+}}, [[RES]]
26define void @select_undef_n1(ptr addrspace(1) %a, i32 %c) {
27  %cc = icmp eq i32 %c, 0
28  %sel = select i1 %cc, float 1.000000e+00, float undef
29  store float %sel, ptr addrspace(1) %a
30  ret void
31}
32
33; GCN-LABEL: {{^}}select_undef_n2:
34; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 1.0
35; GCN: store_dword {{[^,]+}}, [[RES]]
36define void @select_undef_n2(ptr addrspace(1) %a, i32 %c) {
37  %cc = icmp eq i32 %c, 0
38  %sel = select i1 %cc, float undef, float 1.000000e+00
39  store float %sel, ptr addrspace(1) %a
40  ret void
41}
42
43declare float @llvm.amdgcn.rcp.f32(float)
44
45
46; Make sure the vector undef isn't lowered into 0s.
47; GCN-LABEL: {{^}}undef_v6f32:
48; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
49; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
50; GCN: s_cbranch_vccnz
51define amdgpu_kernel void @undef_v6f32(ptr addrspace(3) %ptr, i1 %cond) {
52entry:
53  br label %loop
54
55loop:
56  %phi = phi <6 x float> [ undef, %entry ], [ %add, %loop ]
57  %load = load volatile <6 x float>, ptr addrspace(3) undef
58  %add = fadd <6 x float> %load, %phi
59  br i1 %cond, label %loop, label %ret
60
61ret:
62  store volatile <6 x float> %add, ptr addrspace(3) undef
63  ret void
64}
65
66; GCN-LABEL: {{^}}undef_v6i32:
67; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
68; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
69; GCN: s_cbranch_vccnz
70define amdgpu_kernel void @undef_v6i32(ptr addrspace(3) %ptr, i1 %cond) {
71entry:
72  br label %loop
73
74loop:
75  %phi = phi <6 x i32> [ undef, %entry ], [ %add, %loop ]
76  %load = load volatile <6 x i32>, ptr addrspace(3) undef
77  %add = add <6 x i32> %load, %phi
78  br i1 %cond, label %loop, label %ret
79
80ret:
81  store volatile <6 x i32> %add, ptr addrspace(3) undef
82  ret void
83}
84
85; Make sure the vector undef isn't lowered into 0s.
86; GCN-LABEL: {{^}}undef_v5f32:
87; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
88; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
89; GCN: s_cbranch_vccnz
90define amdgpu_kernel void @undef_v5f32(ptr addrspace(3) %ptr, i1 %cond) {
91entry:
92  br label %loop
93
94loop:
95  %phi = phi <5 x float> [ undef, %entry ], [ %add, %loop ]
96  %load = load volatile <5 x float>, ptr addrspace(3) undef
97  %add = fadd <5 x float> %load, %phi
98  br i1 %cond, label %loop, label %ret
99
100ret:
101  store volatile <5 x float> %add, ptr addrspace(3) undef
102  ret void
103}
104
105; GCN-LABEL: {{^}}undef_v5i32:
106; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
107; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
108; GCN: s_cbranch_vccnz
109define amdgpu_kernel void @undef_v5i32(ptr addrspace(3) %ptr, i1 %cond) {
110entry:
111  br label %loop
112
113loop:
114  %phi = phi <5 x i32> [ undef, %entry ], [ %add, %loop ]
115  %load = load volatile <5 x i32>, ptr addrspace(3) undef
116  %add = add <5 x i32> %load, %phi
117  br i1 %cond, label %loop, label %ret
118
119ret:
120  store volatile <5 x i32> %add, ptr addrspace(3) undef
121  ret void
122}
123
124; Make sure the vector undef isn't lowered into 0s.
125; GCN-LABEL: {{^}}undef_v3f64:
126; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
127; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
128; GCN: s_cbranch_vccnz
129define amdgpu_kernel void @undef_v3f64(ptr addrspace(3) %ptr, i1 %cond) {
130entry:
131  br label %loop
132
133loop:
134  %phi = phi <3 x double> [ undef, %entry ], [ %add, %loop ]
135  %load = load volatile <3 x double>, ptr addrspace(3) %ptr
136  %add = fadd <3 x double> %load, %phi
137  br i1 %cond, label %loop, label %ret
138
139ret:
140  store volatile <3 x double> %add, ptr addrspace(3) %ptr
141  ret void
142}
143
144; GCN-LABEL: {{^}}undef_v3i64:
145; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
146; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
147; GCN: s_cbranch_vccnz
148define amdgpu_kernel void @undef_v3i64(ptr addrspace(3) %ptr, i1 %cond) {
149entry:
150  br label %loop
151
152loop:
153  %phi = phi <3 x i64> [ undef, %entry ], [ %add, %loop ]
154  %load = load volatile <3 x i64>, ptr addrspace(3) %ptr
155  %add = add <3 x i64> %load, %phi
156  br i1 %cond, label %loop, label %ret
157
158ret:
159  store volatile <3 x i64> %add, ptr addrspace(3) %ptr
160  ret void
161}
162
163; Make sure the vector undef isn't lowered into 0s.
164; GCN-LABEL: {{^}}undef_v4f16:
165; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
166; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
167; GCN: s_cbranch_vccnz
168define amdgpu_kernel void @undef_v4f16(ptr addrspace(3) %ptr, i1 %cond) {
169entry:
170  br label %loop
171
172loop:
173  %phi = phi <4 x half> [ undef, %entry ], [ %add, %loop ]
174  %load = load volatile <4 x half>, ptr addrspace(3) %ptr
175  %add = fadd <4 x half> %load, %phi
176  br i1 %cond, label %loop, label %ret
177
178ret:
179  store volatile <4 x half> %add, ptr addrspace(3) %ptr
180  ret void
181}
182
183; GCN-LABEL: {{^}}undef_v4i16:
184; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
185; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
186; GCN: s_cbranch_vccnz
187define amdgpu_kernel void @undef_v4i16(ptr addrspace(3) %ptr, i1 %cond) {
188entry:
189  br label %loop
190
191loop:
192  %phi = phi <4 x i16> [ undef, %entry ], [ %add, %loop ]
193  %load = load volatile <4 x i16>, ptr addrspace(3) %ptr
194  %add = add <4 x i16> %load, %phi
195  br i1 %cond, label %loop, label %ret
196
197ret:
198  store volatile <4 x i16> %add, ptr addrspace(3) %ptr
199  ret void
200}
201
202; Make sure the vector undef isn't lowered into 0s.
203; GCN-LABEL: {{^}}undef_v2f16:
204; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
205; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
206; GCN: s_cbranch_vccnz
207define amdgpu_kernel void @undef_v2f16(ptr addrspace(3) %ptr, i1 %cond) {
208entry:
209  br label %loop
210
211loop:
212  %phi = phi <2 x half> [ undef, %entry ], [ %add, %loop ]
213  %load = load volatile <2 x half>, ptr addrspace(3) %ptr
214  %add = fadd <2 x half> %load, %phi
215  br i1 %cond, label %loop, label %ret
216
217ret:
218  store volatile <2 x half> %add, ptr addrspace(3) %ptr
219  ret void
220}
221
222; GCN-LABEL: {{^}}undef_v2i16:
223; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
224; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
225; GCN: s_cbranch_vccnz
226define amdgpu_kernel void @undef_v2i16(ptr addrspace(3) %ptr, i1 %cond) {
227entry:
228  br label %loop
229
230loop:
231  %phi = phi <2 x i16> [ undef, %entry ], [ %add, %loop ]
232  %load = load volatile <2 x i16>, ptr addrspace(3) %ptr
233  %add = add <2 x i16> %load, %phi
234  br i1 %cond, label %loop, label %ret
235
236ret:
237  store volatile <2 x i16> %add, ptr addrspace(3) %ptr
238  ret void
239}
240
241; We were expanding undef vectors into zero vectors. Optimizations
242; would then see we used no elements of the vector, and reform the
243; undef vector resulting in a combiner loop.
244; GCN-LABEL: {{^}}inf_loop_undef_vector:
245; GCN: s_waitcnt
246; GCN-NEXT: v_mad_u64_u32
247; GCN-NEXT: v_mul_lo_u32
248; GCN-NEXT: v_mul_lo_u32
249; GCN-NEXT: v_add3_u32
250; GCN-NEXT: global_store_dwordx2
251define void @inf_loop_undef_vector(<6 x float> %arg, float %arg1, i64 %arg2) {
252  %i = insertelement <6 x float> %arg, float %arg1, i64 2
253  %i3 = bitcast <6 x float> %i to <3 x i64>
254  %i4 = extractelement <3 x i64> %i3, i64 0
255  %i5 = extractelement <3 x i64> %i3, i64 1
256  %i6 = mul i64 %i5, %arg2
257  %i7 = add i64 %i6, %i4
258  store volatile i64 %i7, ptr addrspace(1) undef, align 4
259  ret void
260}
261
262; GCN-LABEL: {{^}}undef_bf16:
263; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0
264; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0
265; GCN: s_cbranch_vccnz
266define amdgpu_kernel void @undef_bf16(ptr addrspace(3) %ptr, i1 %cond) {
267entry:
268  br label %loop
269
270loop:
271  %phi = phi bfloat [ undef, %entry ], [ %add, %loop ]
272  %load = load volatile bfloat, ptr addrspace(3) undef
273  %bc.0 = bitcast bfloat %load to i16
274  %bc.1 = bitcast bfloat %phi to i16
275  %add.i = add i16 %bc.0, %bc.1
276  %add = bitcast i16 %add.i to bfloat
277  br i1 %cond, label %loop, label %ret
278
279ret:
280  store volatile bfloat %add, ptr addrspace(3) undef
281  ret void
282}
283
284; GCN-LABEL: {{^}}undef_v2bf16:
285; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
286; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0{{$}}
287; GCN: s_cbranch_vccnz
288define amdgpu_kernel void @undef_v2bf16(ptr addrspace(3) %ptr, i1 %cond) {
289entry:
290  br label %loop
291
292loop:
293  %phi = phi <2 x bfloat> [ undef, %entry ], [ %add, %loop ]
294  %load = load volatile <2 x bfloat>, ptr addrspace(3) undef
295  %bc.0 = bitcast <2 x bfloat> %load to <2 x i16>
296  %bc.1 = bitcast <2 x bfloat> %phi to <2 x i16>
297  %add.i = add <2 x i16> %bc.0, %bc.1
298  %add = bitcast <2 x i16> %add.i to <2 x bfloat>
299  br i1 %cond, label %loop, label %ret
300
301ret:
302  store volatile <2 x bfloat> %add, ptr addrspace(3) undef
303  ret void
304}
305
306; GCN-LABEL: {{^}}undef_v3bf16:
307; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
308; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0{{$}}
309; GCN: s_cbranch_vccnz
310define amdgpu_kernel void @undef_v3bf16(ptr addrspace(3) %ptr, i1 %cond) {
311entry:
312  br label %loop
313
314loop:
315  %phi = phi <3 x bfloat> [ undef, %entry ], [ %add, %loop ]
316  %load = load volatile <3 x bfloat>, ptr addrspace(3) undef
317  %bc.0 = bitcast <3 x bfloat> %load to <3 x i16>
318  %bc.1 = bitcast <3 x bfloat> %phi to <3 x i16>
319  %add.i = add <3 x i16> %bc.0, %bc.1
320  %add = bitcast <3 x i16> %add.i to <3 x bfloat>
321  br i1 %cond, label %loop, label %ret
322
323ret:
324  store volatile <3 x bfloat> %add, ptr addrspace(3) undef
325  ret void
326}
327
328; GCN-LABEL: {{^}}undef_v4bf16:
329; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
330; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0{{$}}
331; GCN: s_cbranch_vccnz
332define amdgpu_kernel void @undef_v4bf16(ptr addrspace(3) %ptr, i1 %cond) {
333entry:
334  br label %loop
335
336loop:
337  %phi = phi <4 x bfloat> [ undef, %entry ], [ %add, %loop ]
338  %load = load volatile <4 x bfloat>, ptr addrspace(3) undef
339  %bc.0 = bitcast <4 x bfloat> %load to <4 x i16>
340  %bc.1 = bitcast <4 x bfloat> %phi to <4 x i16>
341  %add.i = add <4 x i16> %bc.0, %bc.1
342  %add = bitcast <4 x i16> %add.i to <4 x bfloat>
343  br i1 %cond, label %loop, label %ret
344
345ret:
346  store volatile <4 x bfloat> %add, ptr addrspace(3) undef
347  ret void
348}
349
350; GCN-LABEL: {{^}}undef_v6bf16:
351; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
352; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0{{$}}
353; GCN: s_cbranch_vccnz
354define amdgpu_kernel void @undef_v6bf16(ptr addrspace(3) %ptr, i1 %cond) {
355entry:
356  br label %loop
357
358loop:
359  %phi = phi <6 x bfloat> [ undef, %entry ], [ %add, %loop ]
360  %load = load volatile <6 x bfloat>, ptr addrspace(3) undef
361  %bc.0 = bitcast <6 x bfloat> %load to <6 x i16>
362  %bc.1 = bitcast <6 x bfloat> %phi to <6 x i16>
363  %add.i = add <6 x i16> %bc.0, %bc.1
364  %add = bitcast <6 x i16> %add.i to <6 x bfloat>
365  br i1 %cond, label %loop, label %ret
366
367ret:
368  store volatile <6 x bfloat> %add, ptr addrspace(3) undef
369  ret void
370}
371
372; GCN-LABEL: {{^}}undef_v8bf16:
373; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
374; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0{{$}}
375; GCN: s_cbranch_vccnz
376define amdgpu_kernel void @undef_v8bf16(ptr addrspace(3) %ptr, i1 %cond) {
377entry:
378  br label %loop
379
380loop:
381  %phi = phi <8 x bfloat> [ undef, %entry ], [ %add, %loop ]
382  %load = load volatile <8 x bfloat>, ptr addrspace(3) undef
383  %bc.0 = bitcast <8 x bfloat> %load to <8 x i16>
384  %bc.1 = bitcast <8 x bfloat> %phi to <8 x i16>
385  %add.i = add <8 x i16> %bc.0, %bc.1
386  %add = bitcast <8 x i16> %add.i to <8 x bfloat>
387  br i1 %cond, label %loop, label %ret
388
389ret:
390  store volatile <8 x bfloat> %add, ptr addrspace(3) undef
391  ret void
392}
393
394; GCN-LABEL: {{^}}undef_v16bf16:
395; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
396; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0{{$}}
397; GCN: s_cbranch_vccnz
398define amdgpu_kernel void @undef_v16bf16(ptr addrspace(3) %ptr, i1 %cond) {
399entry:
400  br label %loop
401
402loop:
403  %phi = phi <16 x bfloat> [ undef, %entry ], [ %add, %loop ]
404  %load = load volatile <16 x bfloat>, ptr addrspace(3) undef
405  %bc.0 = bitcast <16 x bfloat> %load to <16 x i16>
406  %bc.1 = bitcast <16 x bfloat> %phi to <16 x i16>
407  %add.i = add <16 x i16> %bc.0, %bc.1
408  %add = bitcast <16 x i16> %add.i to <16 x bfloat>
409  br i1 %cond, label %loop, label %ret
410
411ret:
412  store volatile <16 x bfloat> %add, ptr addrspace(3) undef
413  ret void
414}
415
416; GCN-LABEL: {{^}}undef_v32bf16:
417; GCN-NOT: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
418; GCN-NOT: s_mov_b32 s{{[0-9]+}}, 0{{$}}
419; GCN: s_cbranch_vccnz
420define amdgpu_kernel void @undef_v32bf16(ptr addrspace(3) %ptr, i1 %cond) {
421entry:
422  br label %loop
423
424loop:
425  %phi = phi <32 x bfloat> [ undef, %entry ], [ %add, %loop ]
426  %load = load volatile <32 x bfloat>, ptr addrspace(3) undef
427  %bc.0 = bitcast <32 x bfloat> %load to <32 x i16>
428  %bc.1 = bitcast <32 x bfloat> %phi to <32 x i16>
429  %add.i = add <32 x i16> %bc.0, %bc.1
430  %add = bitcast <32 x i16> %add.i to <32 x bfloat>
431  br i1 %cond, label %loop, label %ret
432
433ret:
434  store volatile <32 x bfloat> %add, ptr addrspace(3) undef
435  ret void
436}
437
438