xref: /llvm-project/llvm/test/CodeGen/AMDGPU/dagcombine-select.ll (revision 758444ca3e7163a1504eeced3383af861d01d761)
1; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}select_and1:
4; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
5; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
6; GCN-NOT: v_and_b32
7; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
8define amdgpu_kernel void @select_and1(ptr addrspace(1) %p, i32 %x, i32 %y) {
9  %c = icmp slt i32 %x, 11
10  %s = select i1 %c, i32 0, i32 -1
11  %a = and i32 %y, %s
12  store i32 %a, ptr addrspace(1) %p, align 4
13  ret void
14}
15
16; GCN-LABEL: {{^}}select_and2:
17; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
18; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
19; GCN-NOT: v_and_b32
20; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
21define amdgpu_kernel void @select_and2(ptr addrspace(1) %p, i32 %x, i32 %y) {
22  %c = icmp slt i32 %x, 11
23  %s = select i1 %c, i32 0, i32 -1
24  %a = and i32 %s, %y
25  store i32 %a, ptr addrspace(1) %p, align 4
26  ret void
27}
28
29; GCN-LABEL: {{^}}select_and3:
30; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
31; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
32; GCN-NOT: v_and_b32
33; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
34define amdgpu_kernel void @select_and3(ptr addrspace(1) %p, i32 %x, i32 %y) {
35  %c = icmp slt i32 %x, 11
36  %s = select i1 %c, i32 -1, i32 0
37  %a = and i32 %y, %s
38  store i32 %a, ptr addrspace(1) %p, align 4
39  ret void
40}
41
42; GCN-LABEL: {{^}}select_and_v4:
43; GCN:     s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, 0
44; GCN:     s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, 0
45; GCN:     s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, 0
46; GCN:     s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, 0
47; GCN:     v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]]
48; GCN:     v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]]
49; GCN:     v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]]
50; GCN:     v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]]
51; GCN-NOT: v_and_b32
52; GCN:     global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]]
53define amdgpu_kernel void @select_and_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) {
54  %c = icmp slt i32 %x, 11
55  %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
56  %a = and <4 x i32> %s, %y
57  store <4 x i32> %a, ptr addrspace(1) %p, align 32
58  ret void
59}
60
61; GCN-LABEL: {{^}}select_or1:
62; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
63; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
64; GCN-NOT: v_or_b32
65; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
66define amdgpu_kernel void @select_or1(ptr addrspace(1) %p, i32 %x, i32 %y) {
67  %c = icmp slt i32 %x, 11
68  %s = select i1 %c, i32 0, i32 -1
69  %a = or i32 %y, %s
70  store i32 %a, ptr addrspace(1) %p, align 4
71  ret void
72}
73
74; GCN-LABEL: {{^}}select_or2:
75; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
76; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
77; GCN-NOT: v_or_b32
78; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
79define amdgpu_kernel void @select_or2(ptr addrspace(1) %p, i32 %x, i32 %y) {
80  %c = icmp slt i32 %x, 11
81  %s = select i1 %c, i32 0, i32 -1
82  %a = or i32 %s, %y
83  store i32 %a, ptr addrspace(1) %p, align 4
84  ret void
85}
86
87; GCN-LABEL: {{^}}select_or3:
88; GCN:     s_cselect_b32 [[SEL:s[0-9]+]], s{{[0-9]+}},
89; GCN:     v_mov_b32_e32 [[VSEL:v[0-9]+]], [[SEL]]
90; GCN-NOT: v_or_b32
91; GCN:     store_dword v{{[0-9]+}}, [[VSEL]], s{{\[[0-9]+:[0-9]+\]}}
92define amdgpu_kernel void @select_or3(ptr addrspace(1) %p, i32 %x, i32 %y) {
93  %c = icmp slt i32 %x, 11
94  %s = select i1 %c, i32 -1, i32 0
95  %a = or i32 %y, %s
96  store i32 %a, ptr addrspace(1) %p, align 4
97  ret void
98}
99
100; GCN-LABEL: {{^}}select_or_v4:
101; GCN:     s_cselect_b32 s[[SEL0:[0-9]+]], s{{[0-9]+}}, -1
102; GCN:     s_cselect_b32 s[[SEL1:[0-9]+]], s{{[0-9]+}}, -1
103; GCN:     s_cselect_b32 s[[SEL2:[0-9]+]], s{{[0-9]+}}, -1
104; GCN:     s_cselect_b32 s[[SEL3:[0-9]+]], s{{[0-9]+}}, -1
105; GCN-NOT: v_or_b32
106; GCN:     v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]]
107; GCN:     v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]]
108; GCN:     v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]]
109; GCN:     v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]]
110; GCN:     global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]]
111define amdgpu_kernel void @select_or_v4(ptr addrspace(1) %p, i32 %x, <4 x i32> %y) {
112  %c = icmp slt i32 %x, 11
113  %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
114  %a = or <4 x i32> %s, %y
115  store <4 x i32> %a, ptr addrspace(1) %p, align 32
116  ret void
117}
118
119; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
120; GCN: s_cselect_b32 s{{[0-9]+}}, 9, 2
121define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
122  %sel = select i1 %cond, i32 -4, i32 3
123  %bo = sub i32 5, %sel
124  store i32 %bo, ptr addrspace(1) %p, align 4
125  ret void
126}
127
128; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16:
129; GCN: s_cselect_b32 s{{[0-9]+}}, 9, 2
130define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16(ptr addrspace(1) %p, i1 %cond) {
131  %sel = select i1 %cond, i16 -4, i16 3
132  %bo = sub i16 5, %sel
133  store i16 %bo, ptr addrspace(1) %p, align 2
134  ret void
135}
136
137; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_i16_neg:
138; GCN: s_cselect_b32 s[[SGPR:[0-9]+]], s[[SGPR]], 0xf449
139define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_i16_neg(ptr addrspace(1) %p, i1 %cond) {
140  %sel = select i1 %cond, i16 4, i16 3000
141  %bo = sub i16 1, %sel
142  store i16 %bo, ptr addrspace(1) %p, align 2
143  ret void
144}
145
146; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v2i16:
147; GCN-DAG: s_mov_b32 [[T:s[0-9]+]], 0x50009
148; GCN:     s_cselect_b32 s{{[0-9]+}}, [[T]], 0x60002
149define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v2i16(ptr addrspace(1) %p, i1 %cond) {
150  %sel = select i1 %cond, <2 x i16> <i16 -4, i16 2>, <2 x i16> <i16 3, i16 1>
151  %bo = sub <2 x i16> <i16 5, i16 7>, %sel
152  store <2 x i16> %bo, ptr addrspace(1) %p, align 4
153  ret void
154}
155
156; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants_v4i32:
157; GCN:     s_cselect_b32 s[[SEL0:[0-9]+]], 7, 14
158; GCN:     s_cselect_b32 s[[SEL1:[0-9]+]], 6, 10
159; GCN:     s_cselect_b32 s[[SEL2:[0-9]+]], 5, 6
160; GCN:     s_cselect_b32 s[[SEL3:[0-9]+]], 9, 2
161; GCN:     v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]]
162; GCN:     v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]]
163; GCN:     v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]]
164; GCN:     v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]]
165; GCN:     global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]]
166define amdgpu_kernel void @sel_constants_sub_constant_sel_constants_v4i32(ptr addrspace(1) %p, i1 %cond) {
167  %sel = select i1 %cond, <4 x i32> <i32 -4, i32 2, i32 3, i32 4>, <4 x i32> <i32 3, i32 1, i32 -1, i32 -3>
168  %bo = sub <4 x i32> <i32 5, i32 7, i32 9, i32 11>, %sel
169  store <4 x i32> %bo, ptr addrspace(1) %p, align 32
170  ret void
171}
172
173; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i64:
174; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5
175define amdgpu_kernel void @sdiv_constant_sel_constants_i64(ptr addrspace(1) %p, i1 %cond) {
176  %sel = select i1 %cond, i64 121, i64 23
177  %bo = sdiv i64 120, %sel
178  store i64 %bo, ptr addrspace(1) %p, align 8
179  ret void
180}
181
182; GCN-LABEL: {{^}}sdiv_constant_sel_constants_i32:
183; GCN: s_cselect_b32 s{{[0-9]+}}, 26, 8
184define amdgpu_kernel void @sdiv_constant_sel_constants_i32(ptr addrspace(1) %p, i1 %cond) {
185  %sel = select i1 %cond, i32 7, i32 23
186  %bo = sdiv i32 184, %sel
187  store i32 %bo, ptr addrspace(1) %p, align 8
188  ret void
189}
190
191; GCN-LABEL: {{^}}udiv_constant_sel_constants_i64:
192; GCN: s_cselect_b32 s{{[0-9]+}}, 0, 5
193define amdgpu_kernel void @udiv_constant_sel_constants_i64(ptr addrspace(1) %p, i1 %cond) {
194  %sel = select i1 %cond, i64 -4, i64 23
195  %bo = udiv i64 120, %sel
196  store i64 %bo, ptr addrspace(1) %p, align 8
197  ret void
198}
199
200; GCN-LABEL: {{^}}srem_constant_sel_constants:
201; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3
202define amdgpu_kernel void @srem_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
203  %sel = select i1 %cond, i64 34, i64 15
204  %bo = srem i64 33, %sel
205  store i64 %bo, ptr addrspace(1) %p, align 8
206  ret void
207}
208
209; GCN-LABEL: {{^}}urem_constant_sel_constants:
210; GCN: s_cselect_b32 s{{[0-9]+}}, 33, 3
211define amdgpu_kernel void @urem_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
212  %sel = select i1 %cond, i64 34, i64 15
213  %bo = urem i64 33, %sel
214  store i64 %bo, ptr addrspace(1) %p, align 8
215  ret void
216}
217
218; GCN-LABEL: {{^}}shl_constant_sel_constants:
219; GCN: s_cselect_b32 s{{[0-9]+}}, 4, 8
220define amdgpu_kernel void @shl_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
221  %sel = select i1 %cond, i32 2, i32 3
222  %bo = shl i32 1, %sel
223  store i32 %bo, ptr addrspace(1) %p, align 4
224  ret void
225}
226
227; GCN-LABEL: {{^}}lshr_constant_sel_constants:
228; GCN: s_cselect_b32 s{{[0-9]+}}, 16, 8
229define amdgpu_kernel void @lshr_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
230  %sel = select i1 %cond, i32 2, i32 3
231  %bo = lshr i32 64, %sel
232  store i32 %bo, ptr addrspace(1) %p, align 4
233  ret void
234}
235
236; GCN-LABEL: {{^}}ashr_constant_sel_constants:
237; GCN: s_cselect_b32 s{{[0-9]+}}, 32, 16
238define amdgpu_kernel void @ashr_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
239  %sel = select i1 %cond, i32 2, i32 3
240  %bo = ashr i32 128, %sel
241  store i32 %bo, ptr addrspace(1) %p, align 4
242  ret void
243}
244
245; GCN-LABEL: {{^}}fsub_constant_sel_constants:
246; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, -4.0, 1.0,
247define amdgpu_kernel void @fsub_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
248  %sel = select i1 %cond, float -2.0, float 3.0
249  %bo = fsub float -1.0, %sel
250  store float %bo, ptr addrspace(1) %p, align 4
251  ret void
252}
253
254; GCN-LABEL: {{^}}fsub_constant_sel_constants_f16:
255; TODO: it shall be possible to fold constants with OpSel
256; GCN-DAG: v_mov_b32_e32 [[T:v[0-9]+]], 0x3c00
257; GCN-DAG: v_mov_b32_e32 [[F:v[0-9]+]], 0xc400
258; GCN:     v_cndmask_b32_e32 v{{[0-9]+}}, [[F]], [[T]],
259define amdgpu_kernel void @fsub_constant_sel_constants_f16(ptr addrspace(1) %p, i1 %cond) {
260  %sel = select i1 %cond, half -2.0, half 3.0
261  %bo = fsub half -1.0, %sel
262  store half %bo, ptr addrspace(1) %p, align 2
263  ret void
264}
265
266; GCN-LABEL: {{^}}fsub_constant_sel_constants_v2f16:
267; GCN:     s_cselect_b32 s{{[0-9]+}}, 0x45003c00, -2.0
268define amdgpu_kernel void @fsub_constant_sel_constants_v2f16(ptr addrspace(1) %p, i1 %cond) {
269  %sel = select i1 %cond, <2 x half> <half -2.0, half -3.0>, <2 x half> <half -1.0, half 4.0>
270  %bo = fsub <2 x half> <half -1.0, half 2.0>, %sel
271  store <2 x half> %bo, ptr addrspace(1) %p, align 4
272  ret void
273}
274
275; GCN-LABEL: {{^}}fsub_constant_sel_constants_v4f32:
276; GCN:     s_mov_b32 [[T0:s[0-9]+]], 0x41500000
277; GCN:     s_cselect_b32 s[[SEL0:[0-9]+]], [[T0]], 0x40c00000
278; GCN:     s_cselect_b32 s[[SEL1:[0-9]+]], 0x41100000, 4.0
279; GCN:     s_cselect_b32 s[[SEL2:[0-9]+]], 0x40a00000, 2.0
280; GCN:     s_cselect_b32 s[[SEL3:[0-9]+]], 1.0, 0
281; GCN:     v_mov_b32_e32 v[[V0:[0-9]+]], s[[SEL3]]
282; GCN:     v_mov_b32_e32 v[[V1:[0-9]+]], s[[SEL2]]
283; GCN:     v_mov_b32_e32 v[[V2:[0-9]+]], s[[SEL1]]
284; GCN:     v_mov_b32_e32 v[[V3:[0-9]+]], s[[SEL0]]
285; GCN:     global_store_dwordx4 v{{[0-9]+}}, v[[[V0]]:[[V3]]]
286define amdgpu_kernel void @fsub_constant_sel_constants_v4f32(ptr addrspace(1) %p, i1 %cond) {
287  %sel = select i1 %cond, <4 x float> <float -2.0, float -3.0, float -4.0, float -5.0>, <4 x float> <float -1.0, float 0.0, float 1.0, float 2.0>
288  %bo = fsub <4 x float> <float -1.0, float 2.0, float 5.0, float 8.0>, %sel
289  store <4 x float> %bo, ptr addrspace(1) %p, align 32
290  ret void
291}
292
293; GCN-LABEL: {{^}}fdiv_constant_sel_constants:
294; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 4.0, -2.0,
295define amdgpu_kernel void @fdiv_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
296  %sel = select i1 %cond, float -4.0, float 2.0
297  %bo = fdiv float 8.0, %sel
298  store float %bo, ptr addrspace(1) %p, align 4
299  ret void
300}
301
302; GCN-LABEL: {{^}}frem_constant_sel_constants:
303; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2.0, 1.0,
304define amdgpu_kernel void @frem_constant_sel_constants(ptr addrspace(1) %p, i1 %cond) {
305  %sel = select i1 %cond, float -4.0, float 3.0
306  %bo = frem float 5.0, %sel
307  store float %bo, ptr addrspace(1) %p, align 4
308  ret void
309}
310