xref: /llvm-project/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-no-signed-zeros-fp-math < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
3
4; GCN-LABEL: {{^}}add_select_fabs_fabs_f32:
5; GCN: buffer_load_dword [[X:v[0-9]+]]
6; GCN: buffer_load_dword [[Y:v[0-9]+]]
7; GCN: buffer_load_dword [[Z:v[0-9]+]]
8
9; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
10; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
11define amdgpu_kernel void @add_select_fabs_fabs_f32(i32 %c) #0 {
12  %x = load volatile float, ptr addrspace(1) undef
13  %y = load volatile float, ptr addrspace(1) undef
14  %z = load volatile float, ptr addrspace(1) undef
15  %cmp = icmp eq i32 %c, 0
16  %fabs.x = call float @llvm.fabs.f32(float %x)
17  %fabs.y = call float @llvm.fabs.f32(float %y)
18  %select = select i1 %cmp, float %fabs.x, float %fabs.y
19  %add = fadd float %select, %z
20  store float %add, ptr addrspace(1) undef
21  ret void
22}
23
24; GCN-LABEL: {{^}}add_select_multi_use_lhs_fabs_fabs_f32:
25; GCN: buffer_load_dword [[X:v[0-9]+]]
26; GCN: buffer_load_dword [[Y:v[0-9]+]]
27; GCN: buffer_load_dword [[Z:v[0-9]+]]
28; GCN: buffer_load_dword [[W:v[0-9]+]]
29
30; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
31; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
32; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[X]]|, [[W]]
33define amdgpu_kernel void @add_select_multi_use_lhs_fabs_fabs_f32(i32 %c) #0 {
34  %x = load volatile float, ptr addrspace(1) undef
35  %y = load volatile float, ptr addrspace(1) undef
36  %z = load volatile float, ptr addrspace(1) undef
37  %w = load volatile float, ptr addrspace(1) undef
38  %cmp = icmp eq i32 %c, 0
39  %fabs.x = call float @llvm.fabs.f32(float %x)
40  %fabs.y = call float @llvm.fabs.f32(float %y)
41  %select = select i1 %cmp, float %fabs.x, float %fabs.y
42  %add0 = fadd float %select, %z
43  %add1 = fadd float %fabs.x, %w
44  store volatile float %add0, ptr addrspace(1) undef
45  store volatile float %add1, ptr addrspace(1) undef
46  ret void
47}
48
49; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fabs_fabs_f32:
50; GCN: buffer_load_dword [[X:v[0-9]+]]
51; GCN: buffer_load_dword [[Y:v[0-9]+]]
52; GCN: buffer_load_dword [[Z:v[0-9]+]]
53
54; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
55; GCN-DAG: v_add_f32_e64 [[ADD:v[0-9]+]], |[[SELECT]]|, [[Z]]
56; GCN-DAG: v_and_b32_e32 [[X_ABS:v[0-9]+]], 0x7fffffff, [[X]]
57
58; GCN: buffer_store_dword [[ADD]]
59; GCN: buffer_store_dword [[X_ABS]]
60define amdgpu_kernel void @add_select_multi_store_use_lhs_fabs_fabs_f32(i32 %c) #0 {
61  %x = load volatile float, ptr addrspace(1) undef
62  %y = load volatile float, ptr addrspace(1) undef
63  %z = load volatile float, ptr addrspace(1) undef
64  %cmp = icmp eq i32 %c, 0
65  %fabs.x = call float @llvm.fabs.f32(float %x)
66  %fabs.y = call float @llvm.fabs.f32(float %y)
67  %select = select i1 %cmp, float %fabs.x, float %fabs.y
68  %add0 = fadd float %select, %z
69  store volatile float %add0, ptr addrspace(1) undef
70  store volatile float %fabs.x, ptr addrspace(1) undef
71  ret void
72}
73
74; GCN-LABEL: {{^}}add_select_multi_use_rhs_fabs_fabs_f32:
75; GCN: buffer_load_dword [[X:v[0-9]+]]
76; GCN: buffer_load_dword [[Y:v[0-9]+]]
77; GCN: buffer_load_dword [[Z:v[0-9]+]]
78; GCN: buffer_load_dword [[W:v[0-9]+]]
79
80; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
81; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[Z]]
82; GCN-DAG: v_add_f32_e64 v{{[0-9]+}}, |[[Y]]|, [[W]]
83define amdgpu_kernel void @add_select_multi_use_rhs_fabs_fabs_f32(i32 %c) #0 {
84  %x = load volatile float, ptr addrspace(1) undef
85  %y = load volatile float, ptr addrspace(1) undef
86  %z = load volatile float, ptr addrspace(1) undef
87  %w = load volatile float, ptr addrspace(1) undef
88  %cmp = icmp eq i32 %c, 0
89  %fabs.x = call float @llvm.fabs.f32(float %x)
90  %fabs.y = call float @llvm.fabs.f32(float %y)
91  %select = select i1 %cmp, float %fabs.x, float %fabs.y
92  %add0 = fadd float %select, %z
93  %add1 = fadd float %fabs.y, %w
94  store volatile float %add0, ptr addrspace(1) undef
95  store volatile float %add1, ptr addrspace(1) undef
96  ret void
97}
98
99; GCN-LABEL: {{^}}add_select_fabs_var_f32:
100; GCN: buffer_load_dword [[X:v[0-9]+]]
101; GCN: buffer_load_dword [[Y:v[0-9]+]]
102; GCN: buffer_load_dword [[Z:v[0-9]+]]
103
104; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], |[[X]]|,
105; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
106define amdgpu_kernel void @add_select_fabs_var_f32(i32 %c) #0 {
107  %x = load volatile float, ptr addrspace(1) undef
108  %y = load volatile float, ptr addrspace(1) undef
109  %z = load volatile float, ptr addrspace(1) undef
110  %cmp = icmp eq i32 %c, 0
111  %fabs.x = call float @llvm.fabs.f32(float %x)
112  %select = select i1 %cmp, float %fabs.x, float %y
113  %add = fadd float %select, %z
114  store volatile float %add, ptr addrspace(1) undef
115  ret void
116}
117
118; GCN-LABEL: {{^}}add_select_fabs_negk_f32:
119; GCN: buffer_load_dword [[X:v[0-9]+]]
120; GCN: buffer_load_dword [[Y:v[0-9]+]]
121
122; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|,
123; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
124define amdgpu_kernel void @add_select_fabs_negk_f32(i32 %c) #0 {
125  %x = load volatile float, ptr addrspace(1) undef
126  %y = load volatile float, ptr addrspace(1) undef
127  %cmp = icmp eq i32 %c, 0
128  %fabs = call float @llvm.fabs.f32(float %x)
129  %select = select i1 %cmp, float %fabs, float -1.0
130  %add = fadd float %select, %y
131  store volatile float %add, ptr addrspace(1) undef
132  ret void
133}
134
135; FIXME: fabs should fold away
136; GCN-LABEL: {{^}}add_select_fabs_negk_negk_f32:
137; GCN: buffer_load_dword [[X:v[0-9]+]]
138
139; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
140; GCN: v_add_f32_e64 v{{[0-9]+}}, |[[SELECT]]|, [[X]]
141define amdgpu_kernel void @add_select_fabs_negk_negk_f32(i32 %c) #0 {
142  %x = load volatile float, ptr addrspace(1) undef
143  %cmp = icmp eq i32 %c, 0
144  %select = select i1 %cmp, float -2.0, float -1.0
145  %fabs = call float @llvm.fabs.f32(float %select)
146  %add = fadd float %fabs, %x
147  store volatile float %add, ptr addrspace(1) undef
148  ret void
149}
150
151; GCN-LABEL: {{^}}add_select_posk_posk_f32:
152; GCN: buffer_load_dword [[X:v[0-9]+]]
153
154; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
155; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
156define amdgpu_kernel void @add_select_posk_posk_f32(i32 %c) #0 {
157  %x = load volatile float, ptr addrspace(1) undef
158  %cmp = icmp eq i32 %c, 0
159  %select = select i1 %cmp, float 2.0, float 1.0
160  %add = fadd float %select, %x
161  store volatile float %add, ptr addrspace(1) undef
162  ret void
163}
164
165; GCN-LABEL: {{^}}add_select_negk_fabs_f32:
166; GCN: buffer_load_dword [[X:v[0-9]+]]
167; GCN: buffer_load_dword [[Y:v[0-9]+]]
168
169; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
170; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
171; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, |[[X]]|, [[VCC]]
172; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
173define amdgpu_kernel void @add_select_negk_fabs_f32(i32 %c) #0 {
174  %x = load volatile float, ptr addrspace(1) undef
175  %y = load volatile float, ptr addrspace(1) undef
176  %cmp = icmp eq i32 %c, 0
177  %fabs = call float @llvm.fabs.f32(float %x)
178  %select = select i1 %cmp, float -1.0, float %fabs
179  %add = fadd float %select, %y
180  store volatile float %add, ptr addrspace(1) undef
181  ret void
182}
183
184; GCN-LABEL: {{^}}add_select_negliteralk_fabs_f32:
185; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
186; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
187; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xc4800000
188
189; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
190; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
191; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], |[[X]]|, [[VCC]]
192; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
193define amdgpu_kernel void @add_select_negliteralk_fabs_f32(i32 %c) #0 {
194  %x = load volatile float, ptr addrspace(1) undef
195  %y = load volatile float, ptr addrspace(1) undef
196  %cmp = icmp eq i32 %c, 0
197  %fabs = call float @llvm.fabs.f32(float %x)
198  %select = select i1 %cmp, float -1024.0, float %fabs
199  %add = fadd float %select, %y
200  store volatile float %add, ptr addrspace(1) undef
201  ret void
202}
203
204; GCN-LABEL: {{^}}add_select_fabs_posk_f32:
205; GCN: buffer_load_dword [[X:v[0-9]+]]
206; GCN: buffer_load_dword [[Y:v[0-9]+]]
207
208; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
209; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
210define amdgpu_kernel void @add_select_fabs_posk_f32(i32 %c) #0 {
211  %x = load volatile float, ptr addrspace(1) undef
212  %y = load volatile float, ptr addrspace(1) undef
213
214  %cmp = icmp eq i32 %c, 0
215  %fabs = call float @llvm.fabs.f32(float %x)
216  %select = select i1 %cmp, float %fabs, float 1.0
217  %add = fadd float %select, %y
218  store volatile float %add, ptr addrspace(1) undef
219  ret void
220}
221
222; GCN-LABEL: {{^}}add_select_posk_fabs_f32:
223; GCN: buffer_load_dword [[X:v[0-9]+]]
224; GCN: buffer_load_dword [[Y:v[0-9]+]]
225
226; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
227; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
228; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, |[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
229; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
230define amdgpu_kernel void @add_select_posk_fabs_f32(i32 %c) #0 {
231  %x = load volatile float, ptr addrspace(1) undef
232  %y = load volatile float, ptr addrspace(1) undef
233  %cmp = icmp eq i32 %c, 0
234  %fabs = call float @llvm.fabs.f32(float %x)
235  %select = select i1 %cmp, float 1.0, float %fabs
236  %add = fadd float %select, %y
237  store volatile float %add, ptr addrspace(1) undef
238  ret void
239}
240
241; GCN-LABEL: {{^}}add_select_fneg_fneg_f32:
242; GCN: buffer_load_dword [[X:v[0-9]+]]
243; GCN: buffer_load_dword [[Y:v[0-9]+]]
244; GCN: buffer_load_dword [[Z:v[0-9]+]]
245
246; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
247; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
248define amdgpu_kernel void @add_select_fneg_fneg_f32(i32 %c) #0 {
249  %x = load volatile float, ptr addrspace(1) undef
250  %y = load volatile float, ptr addrspace(1) undef
251  %z = load volatile float, ptr addrspace(1) undef
252  %cmp = icmp eq i32 %c, 0
253  %fneg.x = fsub float -0.0, %x
254  %fneg.y = fsub float -0.0, %y
255  %select = select i1 %cmp, float %fneg.x, float %fneg.y
256  %add = fadd float %select, %z
257  store volatile float %add, ptr addrspace(1) undef
258  ret void
259}
260
261; GCN-LABEL: {{^}}add_select_multi_use_lhs_fneg_fneg_f32:
262; GCN: buffer_load_dword [[X:v[0-9]+]]
263; GCN: buffer_load_dword [[Y:v[0-9]+]]
264; GCN: buffer_load_dword [[Z:v[0-9]+]]
265; GCN: buffer_load_dword [[W:v[0-9]+]]
266
267; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
268; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
269; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[X]]
270define amdgpu_kernel void @add_select_multi_use_lhs_fneg_fneg_f32(i32 %c) #0 {
271  %x = load volatile float, ptr addrspace(1) undef
272  %y = load volatile float, ptr addrspace(1) undef
273  %z = load volatile float, ptr addrspace(1) undef
274  %w = load volatile float, ptr addrspace(1) undef
275  %cmp = icmp eq i32 %c, 0
276  %fneg.x = fsub float -0.0, %x
277  %fneg.y = fsub float -0.0, %y
278  %select = select i1 %cmp, float %fneg.x, float %fneg.y
279  %add0 = fadd float %select, %z
280  %add1 = fadd float %fneg.x, %w
281  store volatile float %add0, ptr addrspace(1) undef
282  store volatile float %add1, ptr addrspace(1) undef
283  ret void
284}
285
286; GCN-LABEL: {{^}}add_select_multi_store_use_lhs_fneg_fneg_f32:
287; GCN: buffer_load_dword [[X:v[0-9]+]]
288; GCN: buffer_load_dword [[Y:v[0-9]+]]
289; GCN: buffer_load_dword [[Z:v[0-9]+]]
290
291; GCN-DAG: v_xor_b32_e32 [[NEG_X:v[0-9]+]], 0x80000000, [[X]]
292; GCN-DAG: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
293; GCN-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[Z]], [[SELECT]]
294
295; GCN: buffer_store_dword [[ADD]]
296; GCN: buffer_store_dword [[NEG_X]]
297define amdgpu_kernel void @add_select_multi_store_use_lhs_fneg_fneg_f32(i32 %c) #0 {
298  %x = load volatile float, ptr addrspace(1) undef
299  %y = load volatile float, ptr addrspace(1) undef
300  %z = load volatile float, ptr addrspace(1) undef
301  %cmp = icmp eq i32 %c, 0
302  %fneg.x = fsub float -0.0, %x
303  %fneg.y = fsub float -0.0, %y
304  %select = select i1 %cmp, float %fneg.x, float %fneg.y
305  %add0 = fadd float %select, %z
306  store volatile float %add0, ptr addrspace(1) undef
307  store volatile float %fneg.x, ptr addrspace(1) undef
308  ret void
309}
310
311; GCN-LABEL: {{^}}add_select_multi_use_rhs_fneg_fneg_f32:
312; GCN: buffer_load_dword [[X:v[0-9]+]]
313; GCN: buffer_load_dword [[Y:v[0-9]+]]
314; GCN: buffer_load_dword [[Z:v[0-9]+]]
315; GCN: buffer_load_dword [[W:v[0-9]+]]
316
317; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[Y]], [[X]], vcc
318; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
319; GCN-DAG: v_sub_f32_e32 v{{[0-9]+}}, [[W]], [[Y]]
320define amdgpu_kernel void @add_select_multi_use_rhs_fneg_fneg_f32(i32 %c) #0 {
321  %x = load volatile float, ptr addrspace(1) undef
322  %y = load volatile float, ptr addrspace(1) undef
323  %z = load volatile float, ptr addrspace(1) undef
324  %w = load volatile float, ptr addrspace(1) undef
325  %cmp = icmp eq i32 %c, 0
326  %fneg.x = fsub float -0.0, %x
327  %fneg.y = fsub float -0.0, %y
328  %select = select i1 %cmp, float %fneg.x, float %fneg.y
329  %add0 = fadd float %select, %z
330  %add1 = fadd float %fneg.y, %w
331  store volatile float %add0, ptr addrspace(1) undef
332  store volatile float %add1, ptr addrspace(1) undef
333  ret void
334}
335
336; GCN-LABEL: {{^}}add_select_fneg_var_f32:
337; GCN: buffer_load_dword [[X:v[0-9]+]]
338; GCN: buffer_load_dword [[Y:v[0-9]+]]
339; GCN: buffer_load_dword [[Z:v[0-9]+]]
340
341; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[Y]], -[[X]],
342; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
343define amdgpu_kernel void @add_select_fneg_var_f32(i32 %c) #0 {
344  %x = load volatile float, ptr addrspace(1) undef
345  %y = load volatile float, ptr addrspace(1) undef
346  %z = load volatile float, ptr addrspace(1) undef
347  %cmp = icmp eq i32 %c, 0
348  %fneg.x = fsub float -0.0, %x
349  %select = select i1 %cmp, float %fneg.x, float %y
350  %add = fadd float %select, %z
351  store volatile float %add, ptr addrspace(1) undef
352  ret void
353}
354
355; GCN-LABEL: {{^}}add_select_fneg_negk_f32:
356; GCN: buffer_load_dword [[X:v[0-9]+]]
357; GCN: buffer_load_dword [[Y:v[0-9]+]]
358
359; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
360; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
361define amdgpu_kernel void @add_select_fneg_negk_f32(i32 %c) #0 {
362  %x = load volatile float, ptr addrspace(1) undef
363  %y = load volatile float, ptr addrspace(1) undef
364  %cmp = icmp eq i32 %c, 0
365  %fneg.x = fsub float -0.0, %x
366  %select = select i1 %cmp, float %fneg.x, float -1.0
367  %add = fadd float %select, %y
368  store volatile float %add, ptr addrspace(1) undef
369  ret void
370}
371
372; GCN-LABEL: {{^}}add_select_fneg_inv2pi_f32:
373; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
374; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
375
376; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
377; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
378; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
379define amdgpu_kernel void @add_select_fneg_inv2pi_f32(i32 %c) #0 {
380  %x = load volatile float, ptr addrspace(1) undef
381  %y = load volatile float, ptr addrspace(1) undef
382  %cmp = icmp eq i32 %c, 0
383  %fneg.x = fneg float %x
384  %select = select i1 %cmp, float %fneg.x, float 0x3FC45F3060000000
385  %add = fadd float %select, %y
386  store volatile float %add, ptr addrspace(1) undef
387  ret void
388}
389
390; GCN-LABEL: {{^}}add_select_fneg_neginv2pi_f32:
391; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
392; GCN-DAG: buffer_load_dword [[Y:v[0-9]+]]
393; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
394
395; SI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K]], [[X]], vcc
396; VI: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 0.15915494, [[X]], vcc
397
398; GCN: v_sub_f32_e32 v{{[0-9]+}},  [[Y]], [[SELECT]]
399define amdgpu_kernel void @add_select_fneg_neginv2pi_f32(i32 %c) #0 {
400  %x = load volatile float, ptr addrspace(1) undef
401  %y = load volatile float, ptr addrspace(1) undef
402  %cmp = icmp eq i32 %c, 0
403  %fneg.x = fneg float %x
404  %select = select i1 %cmp, float %fneg.x, float 0xBFC45F3060000000
405  %add = fadd float %select, %y
406  store volatile float %add, ptr addrspace(1) undef
407  ret void
408}
409
410; GCN-LABEL: {{^}}add_select_negk_negk_f32:
411; GCN: buffer_load_dword [[X:v[0-9]+]]
412
413; GCN: s_cmp_eq_u32
414; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -1.0, -2.0, s
415; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
416define amdgpu_kernel void @add_select_negk_negk_f32(i32 %c) #0 {
417  %x = load volatile float, ptr addrspace(1) undef
418  %cmp = icmp eq i32 %c, 0
419  %select = select i1 %cmp, float -2.0, float -1.0
420  %add = fadd float %select, %x
421  store volatile float %add, ptr addrspace(1) undef
422  ret void
423}
424
425; GCN-LABEL: {{^}}add_select_negliteralk_negliteralk_f32:
426; GCN-DAG: v_mov_b32_e32 [[K0:v[0-9]+]], 0xc5000000
427; GCN-DAG: v_mov_b32_e32 [[K1:v[0-9]+]], 0xc5800000
428; GCN-DAG: buffer_load_dword [[X:v[0-9]+]]
429
430; GCN: s_cmp_eq_u32
431; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], [[K1]], [[K0]], vcc
432; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
433define amdgpu_kernel void @add_select_negliteralk_negliteralk_f32(i32 %c) #0 {
434  %x = load volatile float, ptr addrspace(1) undef
435  %cmp = icmp eq i32 %c, 0
436  %select = select i1 %cmp, float -2048.0, float -4096.0
437  %add = fadd float %select, %x
438  store volatile float %add, ptr addrspace(1) undef
439  ret void
440}
441
442; GCN-LABEL: {{^}}add_select_fneg_negk_negk_f32:
443; GCN: buffer_load_dword [[X:v[0-9]+]]
444
445; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 1.0, 2.0, s
446; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[X]]
447define amdgpu_kernel void @add_select_fneg_negk_negk_f32(i32 %c) #0 {
448  %x = load volatile float, ptr addrspace(1) undef
449  %cmp = icmp eq i32 %c, 0
450  %select = select i1 %cmp, float -2.0, float -1.0
451  %fneg.x = fsub float -0.0, %select
452  %add = fadd float %fneg.x, %x
453  store volatile float %add, ptr addrspace(1) undef
454  ret void
455}
456
457; GCN-LABEL: {{^}}add_select_negk_fneg_f32:
458; GCN: buffer_load_dword [[X:v[0-9]+]]
459; GCN: buffer_load_dword [[Y:v[0-9]+]]
460
461; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
462; GCN: s_cselect_b64 vcc, -1, 0
463; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 1.0, [[X]], vcc
464; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
465define amdgpu_kernel void @add_select_negk_fneg_f32(i32 %c) #0 {
466  %x = load volatile float, ptr addrspace(1) undef
467  %y = load volatile float, ptr addrspace(1) undef
468  %cmp = icmp eq i32 %c, 0
469  %fneg.x = fsub float -0.0, %x
470  %select = select i1 %cmp, float -1.0, float %fneg.x
471  %add = fadd float %select, %y
472  store volatile float %add, ptr addrspace(1) undef
473  ret void
474}
475
476; GCN-LABEL: {{^}}add_select_fneg_posk_f32:
477; GCN: buffer_load_dword [[X:v[0-9]+]]
478; GCN: buffer_load_dword [[Y:v[0-9]+]]
479
480; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
481; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
482define amdgpu_kernel void @add_select_fneg_posk_f32(i32 %c) #0 {
483  %x = load volatile float, ptr addrspace(1) undef
484  %y = load volatile float, ptr addrspace(1) undef
485  %cmp = icmp eq i32 %c, 0
486  %fneg.x = fsub float -0.0, %x
487  %select = select i1 %cmp, float %fneg.x, float 1.0
488  %add = fadd float %select, %y
489  store volatile float %add, ptr addrspace(1) undef
490  ret void
491}
492
493; GCN-LABEL: {{^}}add_select_posk_fneg_f32:
494; GCN: buffer_load_dword [[X:v[0-9]+]]
495; GCN: buffer_load_dword [[Y:v[0-9]+]]
496
497; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
498; GCN: s_cselect_b64 vcc, -1, 0
499; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], -1.0, [[X]], vcc
500; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Y]], [[SELECT]]
501define amdgpu_kernel void @add_select_posk_fneg_f32(i32 %c) #0 {
502  %x = load volatile float, ptr addrspace(1) undef
503  %y = load volatile float, ptr addrspace(1) undef
504  %cmp = icmp eq i32 %c, 0
505  %fneg.x = fsub float -0.0, %x
506  %select = select i1 %cmp, float 1.0, float %fneg.x
507  %add = fadd float %select, %y
508  store volatile float %add, ptr addrspace(1) undef
509  ret void
510}
511
512; GCN-LABEL: {{^}}add_select_negfabs_fabs_f32:
513; GCN: buffer_load_dword [[X:v[0-9]+]]
514; GCN: buffer_load_dword [[Y:v[0-9]+]]
515; GCN: buffer_load_dword [[Z:v[0-9]+]]
516
517; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -|[[X]]|,
518; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
519define amdgpu_kernel void @add_select_negfabs_fabs_f32(i32 %c) #0 {
520  %x = load volatile float, ptr addrspace(1) undef
521  %y = load volatile float, ptr addrspace(1) undef
522  %z = load volatile float, ptr addrspace(1) undef
523  %cmp = icmp eq i32 %c, 0
524  %fabs.x = call float @llvm.fabs.f32(float %x)
525  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
526  %fabs.y = call float @llvm.fabs.f32(float %y)
527  %select = select i1 %cmp, float %fneg.fabs.x, float %fabs.y
528  %add = fadd float %select, %z
529  store volatile float %add, ptr addrspace(1) undef
530  ret void
531}
532
533; GCN-LABEL: {{^}}add_select_fabs_negfabs_f32:
534; GCN: buffer_load_dword [[X:v[0-9]+]]
535; GCN: buffer_load_dword [[Y:v[0-9]+]]
536; GCN: buffer_load_dword [[Z:v[0-9]+]]
537
538; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -|[[Y]]|, |[[X]]|,
539; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
540define amdgpu_kernel void @add_select_fabs_negfabs_f32(i32 %c) #0 {
541  %x = load volatile float, ptr addrspace(1) undef
542  %y = load volatile float, ptr addrspace(1) undef
543  %z = load volatile float, ptr addrspace(1) undef
544  %cmp = icmp eq i32 %c, 0
545  %fabs.x = call float @llvm.fabs.f32(float %x)
546  %fabs.y = call float @llvm.fabs.f32(float %y)
547  %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
548  %select = select i1 %cmp, float %fabs.x, float %fneg.fabs.y
549  %add = fadd float %select, %z
550  store volatile float %add, ptr addrspace(1) undef
551  ret void
552}
553
554; GCN-LABEL: {{^}}add_select_neg_fabs_f32:
555; GCN: buffer_load_dword [[X:v[0-9]+]]
556; GCN: buffer_load_dword [[Y:v[0-9]+]]
557; GCN: buffer_load_dword [[Z:v[0-9]+]]
558
559; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, -[[X]],
560; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
561define amdgpu_kernel void @add_select_neg_fabs_f32(i32 %c) #0 {
562  %x = load volatile float, ptr addrspace(1) undef
563  %y = load volatile float, ptr addrspace(1) undef
564  %z = load volatile float, ptr addrspace(1) undef
565  %cmp = icmp eq i32 %c, 0
566  %fneg.x = fsub float -0.000000e+00, %x
567  %fabs.y = call float @llvm.fabs.f32(float %y)
568  %select = select i1 %cmp, float %fneg.x, float %fabs.y
569  %add = fadd float %select, %z
570  store volatile float %add, ptr addrspace(1) undef
571  ret void
572}
573
574; GCN-LABEL: {{^}}add_select_fabs_neg_f32:
575; GCN: buffer_load_dword [[X:v[0-9]+]]
576; GCN: buffer_load_dword [[Y:v[0-9]+]]
577; GCN: buffer_load_dword [[Z:v[0-9]+]]
578
579; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -[[Y]], |[[X]]|,
580; GCN: v_add_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Z]]
581define amdgpu_kernel void @add_select_fabs_neg_f32(i32 %c) #0 {
582  %x = load volatile float, ptr addrspace(1) undef
583  %y = load volatile float, ptr addrspace(1) undef
584  %z = load volatile float, ptr addrspace(1) undef
585  %cmp = icmp eq i32 %c, 0
586  %fabs.x = call float @llvm.fabs.f32(float %x)
587  %fneg.y = fsub float -0.000000e+00, %y
588  %select = select i1 %cmp, float %fabs.x, float %fneg.y
589  %add = fadd float %select, %z
590  store volatile float %add, ptr addrspace(1) undef
591  ret void
592}
593
594; GCN-LABEL: {{^}}add_select_neg_negfabs_f32:
595; GCN: buffer_load_dword [[X:v[0-9]+]]
596; GCN: buffer_load_dword [[Y:v[0-9]+]]
597; GCN: buffer_load_dword [[Z:v[0-9]+]]
598
599; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[Y]]|, [[X]],
600; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
601define amdgpu_kernel void @add_select_neg_negfabs_f32(i32 %c) #0 {
602  %x = load volatile float, ptr addrspace(1) undef
603  %y = load volatile float, ptr addrspace(1) undef
604  %z = load volatile float, ptr addrspace(1) undef
605  %cmp = icmp eq i32 %c, 0
606  %fneg.x = fsub float -0.000000e+00, %x
607  %fabs.y = call float @llvm.fabs.f32(float %y)
608  %fneg.fabs.y = fsub float -0.000000e+00, %fabs.y
609  %select = select i1 %cmp, float %fneg.x, float %fneg.fabs.y
610  %add = fadd float %select, %z
611  store volatile float %add, ptr addrspace(1) undef
612  ret void
613}
614
615; GCN-LABEL: {{^}}add_select_negfabs_neg_f32:
616; GCN: buffer_load_dword [[X:v[0-9]+]]
617; GCN: buffer_load_dword [[Y:v[0-9]+]]
618; GCN: buffer_load_dword [[Z:v[0-9]+]]
619
620; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], |[[X]]|, [[Y]],
621; GCN: v_sub_f32_e32 v{{[0-9]+}}, [[Z]], [[SELECT]]
622define amdgpu_kernel void @add_select_negfabs_neg_f32(i32 %c) #0 {
623  %x = load volatile float, ptr addrspace(1) undef
624  %y = load volatile float, ptr addrspace(1) undef
625  %z = load volatile float, ptr addrspace(1) undef
626  %cmp = icmp eq i32 %c, 0
627  %fabs.x = call float @llvm.fabs.f32(float %x)
628  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
629  %fneg.y = fsub float -0.000000e+00, %y
630  %select = select i1 %cmp, float %fneg.y, float %fneg.fabs.x
631  %add = fadd float %select, %z
632  store volatile float %add, ptr addrspace(1) undef
633  ret void
634}
635
636; GCN-LABEL: {{^}}mul_select_negfabs_posk_f32:
637; GCN: buffer_load_dword [[X:v[0-9]+]]
638; GCN: buffer_load_dword [[Y:v[0-9]+]]
639
640; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
641; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
642; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]]
643; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
644define amdgpu_kernel void @mul_select_negfabs_posk_f32(i32 %c) #0 {
645  %x = load volatile float, ptr addrspace(1) undef
646  %y = load volatile float, ptr addrspace(1) undef
647  %cmp = icmp eq i32 %c, 0
648  %fabs.x = call float @llvm.fabs.f32(float %x)
649  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
650  %select = select i1 %cmp, float %fneg.fabs.x, float 4.0
651  %add = fmul float %select, %y
652  store volatile float %add, ptr addrspace(1) undef
653  ret void
654}
655
656; GCN-LABEL: {{^}}mul_select_posk_negfabs_f32:
657; GCN: buffer_load_dword [[X:v[0-9]+]]
658; GCN: buffer_load_dword [[Y:v[0-9]+]]
659
660; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
661; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
662; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 4.0, -|[[X]]|, [[VCC]]
663; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
664define amdgpu_kernel void @mul_select_posk_negfabs_f32(i32 %c) #0 {
665  %x = load volatile float, ptr addrspace(1) undef
666  %y = load volatile float, ptr addrspace(1) undef
667  %cmp = icmp eq i32 %c, 0
668  %fabs.x = call float @llvm.fabs.f32(float %x)
669  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
670  %select = select i1 %cmp, float 4.0, float %fneg.fabs.x
671  %add = fmul float %select, %y
672  store volatile float %add, ptr addrspace(1) undef
673  ret void
674}
675
676; GCN-LABEL: {{^}}mul_select_negfabs_negk_f32:
677; GCN: buffer_load_dword [[X:v[0-9]+]]
678; GCN: buffer_load_dword [[Y:v[0-9]+]]
679
680; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
681; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
682define amdgpu_kernel void @mul_select_negfabs_negk_f32(i32 %c) #0 {
683  %x = load volatile float, ptr addrspace(1) undef
684  %y = load volatile float, ptr addrspace(1) undef
685  %cmp = icmp eq i32 %c, 0
686  %fabs.x = call float @llvm.fabs.f32(float %x)
687  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
688  %select = select i1 %cmp, float %fneg.fabs.x, float -4.0
689  %add = fmul float %select, %y
690  store volatile float %add, ptr addrspace(1) undef
691  ret void
692}
693
694; GCN-LABEL: {{^}}mul_select_negk_negfabs_f32:
695; GCN: buffer_load_dword [[X:v[0-9]+]]
696; GCN: buffer_load_dword [[Y:v[0-9]+]]
697
698; GCN: s_cmp_lg_u32
699; GCN: s_cselect_b64 [[VCC:.*]], -1, 0
700; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], -4.0, -|[[X]]|, [[VCC]]
701; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
702define amdgpu_kernel void @mul_select_negk_negfabs_f32(i32 %c) #0 {
703  %x = load volatile float, ptr addrspace(1) undef
704  %y = load volatile float, ptr addrspace(1) undef
705  %cmp = icmp eq i32 %c, 0
706  %fabs.x = call float @llvm.fabs.f32(float %x)
707  %fneg.fabs.x = fsub float -0.000000e+00, %fabs.x
708  %select = select i1 %cmp, float -4.0, float %fneg.fabs.x
709  %add = fmul float %select, %y
710  store volatile float %add, ptr addrspace(1) undef
711  ret void
712}
713
714; --------------------------------------------------------------------------------
715; Don't fold if fneg can fold into the source
716; --------------------------------------------------------------------------------
717
718; GCN-LABEL: {{^}}select_fneg_posk_src_add_f32:
719; GCN: buffer_load_dword [[X:v[0-9]+]]
720; GCN: buffer_load_dword [[Y:v[0-9]+]]
721
722; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], -4.0, [[X]]
723; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
724; GCN-NEXT: buffer_store_dword [[SELECT]]
725define amdgpu_kernel void @select_fneg_posk_src_add_f32(i32 %c) #0 {
726  %x = load volatile float, ptr addrspace(1) undef
727  %y = load volatile float, ptr addrspace(1) undef
728  %cmp = icmp eq i32 %c, 0
729  %add = fadd float %x, 4.0
730  %fneg = fsub float -0.0, %add
731  %select = select i1 %cmp, float %fneg, float 2.0
732  store volatile float %select, ptr addrspace(1) undef
733  ret void
734}
735
736; GCN-LABEL: {{^}}select_fneg_posk_src_sub_f32:
737; GCN: buffer_load_dword [[X:v[0-9]+]]
738
739; GCN: v_sub_f32_e32 [[ADD:v[0-9]+]], 4.0, [[X]]
740; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[ADD]], vcc
741; GCN-NEXT: buffer_store_dword [[SELECT]]
742define amdgpu_kernel void @select_fneg_posk_src_sub_f32(i32 %c) #0 {
743  %x = load volatile float, ptr addrspace(1) undef
744  %cmp = icmp eq i32 %c, 0
745  %add = fsub float %x, 4.0
746  %fneg = fsub float -0.0, %add
747  %select = select i1 %cmp, float %fneg, float 2.0
748  store volatile float %select, ptr addrspace(1) undef
749  ret void
750}
751
752; GCN-LABEL: {{^}}select_fneg_posk_src_mul_f32:
753; GCN: buffer_load_dword [[X:v[0-9]+]]
754
755; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[X]]
756; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[MUL]], vcc
757; GCN-NEXT: buffer_store_dword [[SELECT]]
758define amdgpu_kernel void @select_fneg_posk_src_mul_f32(i32 %c) #0 {
759  %x = load volatile float, ptr addrspace(1) undef
760  %cmp = icmp eq i32 %c, 0
761  %mul = fmul float %x, 4.0
762  %fneg = fsub float -0.0, %mul
763  %select = select i1 %cmp, float %fneg, float 2.0
764  store volatile float %select, ptr addrspace(1) undef
765  ret void
766}
767
768; GCN-LABEL: {{^}}select_fneg_posk_src_fma_f32:
769; GCN: buffer_load_dword [[X:v[0-9]+]]
770; GCN: buffer_load_dword [[Z:v[0-9]+]]
771
772; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[X]], -4.0, -[[Z]]
773; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[FMA]], vcc
774; GCN-NEXT: buffer_store_dword [[SELECT]]
775define amdgpu_kernel void @select_fneg_posk_src_fma_f32(i32 %c) #0 {
776  %x = load volatile float, ptr addrspace(1) undef
777  %z = load volatile float, ptr addrspace(1) undef
778  %cmp = icmp eq i32 %c, 0
779  %fma = call float @llvm.fma.f32(float %x, float 4.0, float %z)
780  %fneg = fsub float -0.0, %fma
781  %select = select i1 %cmp, float %fneg, float 2.0
782  store volatile float %select, ptr addrspace(1) undef
783  ret void
784}
785
786; GCN-LABEL: {{^}}select_fneg_posk_src_fmad_f32:
787; GCN: buffer_load_dword [[X:v[0-9]+]]
788; GCN: buffer_load_dword [[Z:v[0-9]+]]
789
790; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[X]], vcc
791; GCN-NEXT: buffer_store_dword [[SELECT]]
792define amdgpu_kernel void @select_fneg_posk_src_fmad_f32(i32 %c) #0 {
793  %x = load volatile float, ptr addrspace(1) undef
794  %z = load volatile float, ptr addrspace(1) undef
795  %cmp = icmp eq i32 %c, 0
796  %fmad = call float @llvm.fmuladd.f32(float %x, float 4.0, float %z)
797  %fneg = fsub float -0.0, %fmad
798  %select = select i1 %cmp, float %fneg, float 2.0
799  store volatile float %select, ptr addrspace(1) undef
800  ret void
801}
802
803; FIXME: This one should fold to rcp
804; GCN-LABEL: {{^}}select_fneg_posk_src_rcp_f32:
805; GCN: buffer_load_dword [[X:v[0-9]+]]
806
807; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[X]]
808; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 2.0, [[RCP]], vcc
809; GCN-NEXT: buffer_store_dword [[SELECT]]
810define amdgpu_kernel void @select_fneg_posk_src_rcp_f32(i32 %c) #0 {
811  %x = load volatile float, ptr addrspace(1) undef
812  %y = load volatile float, ptr addrspace(1) undef
813  %cmp = icmp eq i32 %c, 0
814  %rcp = call float @llvm.amdgcn.rcp.f32(float %x)
815  %fneg = fsub float -0.0, %rcp
816  %select = select i1 %cmp, float %fneg, float 2.0
817  store volatile float %select, ptr addrspace(1) undef
818  ret void
819}
820
821; GCN-LABEL: {{^}}mul_select_negfabs_posk_inv2pi_f32:
822; GCN: buffer_load_dword [[X:v[0-9]+]]
823; GCN: buffer_load_dword [[Y:v[0-9]+]]
824
825; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
826; GCN-DAG: s_cselect_b64  [[VCC:.*]], -1, 0
827
828; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
829; SI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, [[VCC]]
830; SI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
831
832; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]]
833; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
834define amdgpu_kernel void @mul_select_negfabs_posk_inv2pi_f32(i32 %c) #0 {
835  %x = load volatile float, ptr addrspace(1) undef
836  %y = load volatile float, ptr addrspace(1) undef
837  %cmp = icmp eq i32 %c, 0
838  %fabs.x = call float @llvm.fabs.f32(float %x)
839  %fneg.fabs.x = fneg float %fabs.x
840  %select = select i1 %cmp, float %fneg.fabs.x, float 0x3FC45F3060000000
841  %add = fmul float %select, %y
842  store volatile float %add, ptr addrspace(1) undef
843  ret void
844}
845
846; GCN-LABEL: {{^}}mul_select_posk_inv2pi_negfabs_f32:
847; GCN: buffer_load_dword [[X:v[0-9]+]]
848; GCN: buffer_load_dword [[Y:v[0-9]+]]
849
850; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
851
852; GCN-DAG: s_cselect_b64  [[VCC:.*]], -1, 0
853
854; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e22f983
855; SI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, [[VCC]]
856; SI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
857
858
859; VI: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0.15915494, -|[[X]]|, [[VCC]]
860; VI: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
861define amdgpu_kernel void @mul_select_posk_inv2pi_negfabs_f32(i32 %c) #0 {
862  %x = load volatile float, ptr addrspace(1) undef
863  %y = load volatile float, ptr addrspace(1) undef
864  %cmp = icmp eq i32 %c, 0
865  %fabs.x = call float @llvm.fabs.f32(float %x)
866  %fneg.fabs.x = fneg float %fabs.x
867  %select = select i1 %cmp, float 0x3FC45F3060000000, float %fneg.fabs.x
868  %add = fmul float %select, %y
869  store volatile float %add, ptr addrspace(1) undef
870  ret void
871}
872
873; GCN-LABEL: {{^}}mul_select_negfabs_negk_inv2pi_f32:
874; GCN: buffer_load_dword [[X:v[0-9]+]]
875; GCN: buffer_load_dword [[Y:v[0-9]+]]
876; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
877; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s
878; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
879define amdgpu_kernel void @mul_select_negfabs_negk_inv2pi_f32(i32 %c) #0 {
880  %x = load volatile float, ptr addrspace(1) undef
881  %y = load volatile float, ptr addrspace(1) undef
882  %cmp = icmp eq i32 %c, 0
883  %fabs.x = call float @llvm.fabs.f32(float %x)
884  %fneg.fabs.x = fneg float %fabs.x
885  %select = select i1 %cmp, float %fneg.fabs.x, float 0xBFC45F3060000000
886  %add = fmul float %select, %y
887  store volatile float %add, ptr addrspace(1) undef
888  ret void
889}
890
891; GCN-LABEL: {{^}}mul_select_negk_inv2pi_negfabs_f32:
892; GCN: buffer_load_dword [[X:v[0-9]+]]
893; GCN: buffer_load_dword [[Y:v[0-9]+]]
894
895; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xbe22f983
896; GCN: s_cmp_lg_u32
897; GCN: s_cselect_b64 s[0:1], -1, 0
898; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[K]], -|[[X]]|, s[0:1]
899; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
900define amdgpu_kernel void @mul_select_negk_inv2pi_negfabs_f32(i32 %c) #0 {
901  %x = load volatile float, ptr addrspace(1) undef
902  %y = load volatile float, ptr addrspace(1) undef
903  %cmp = icmp eq i32 %c, 0
904  %fabs.x = call float @llvm.fabs.f32(float %x)
905  %fneg.fabs.x = fneg float %fabs.x
906  %select = select i1 %cmp, float 0xBFC45F3060000000, float %fneg.fabs.x
907  %add = fmul float %select, %y
908  store volatile float %add, ptr addrspace(1) undef
909  ret void
910}
911
912; GCN-LABEL: {{^}}mul_select_negfabs_posk_0_f32:
913; GCN: buffer_load_dword [[X:v[0-9]+]]
914; GCN: buffer_load_dword [[Y:v[0-9]+]]
915; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0
916; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
917; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]]
918; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
919define amdgpu_kernel void @mul_select_negfabs_posk_0_f32(i32 %c) #0 {
920  %x = load volatile float, ptr addrspace(1) undef
921  %y = load volatile float, ptr addrspace(1) undef
922  %cmp = icmp eq i32 %c, 0
923  %fabs.x = call float @llvm.fabs.f32(float %x)
924  %fneg.fabs.x = fneg float %fabs.x
925  %select = select i1 %cmp, float %fneg.fabs.x, float 0.0
926  %add = fmul float %select, %y
927  store volatile float %add, ptr addrspace(1) undef
928  ret void
929}
930
931
932; GCN-LABEL: {{^}}mul_select_posk_0_negfabs_f32:
933; GCN: buffer_load_dword [[X:v[0-9]+]]
934; GCN: buffer_load_dword [[Y:v[0-9]+]]
935
936; GCN-DAG: s_cmp_lg_u32 s{{[0-9]+}}, 0
937; GCN: s_cselect_b64  [[VCC:.*]], -1, 0
938; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, -|[[X]]|, [[VCC]]
939; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
940define amdgpu_kernel void @mul_select_posk_0_negfabs_f32(i32 %c) #0 {
941  %x = load volatile float, ptr addrspace(1) undef
942  %y = load volatile float, ptr addrspace(1) undef
943  %cmp = icmp eq i32 %c, 0
944  %fabs.x = call float @llvm.fabs.f32(float %x)
945  %fneg.fabs.x = fneg float %fabs.x
946  %select = select i1 %cmp, float 0.0, float %fneg.fabs.x
947  %add = fmul float %select, %y
948  store volatile float %add, ptr addrspace(1) undef
949  ret void
950}
951
952; GCN-LABEL: {{^}}mul_select_negfabs_negk_0_f32:
953; GCN: buffer_load_dword [[X:v[0-9]+]]
954; GCN: buffer_load_dword [[Y:v[0-9]+]]
955
956; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
957; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s{{\[[0-9]+:[0-9]+\]}}
958; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
959define amdgpu_kernel void @mul_select_negfabs_negk_0_f32(i32 %c) #0 {
960  %x = load volatile float, ptr addrspace(1) undef
961  %y = load volatile float, ptr addrspace(1) undef
962  %cmp = icmp eq i32 %c, 0
963  %fabs.x = call float @llvm.fabs.f32(float %x)
964  %fneg.fabs.x = fneg float %fabs.x
965  %select = select i1 %cmp, float %fneg.fabs.x, float -0.0
966  %add = fmul float %select, %y
967  store volatile float %add, ptr addrspace(1) undef
968  ret void
969}
970
971; GCN-LABEL: {{^}}mul_select_negk_0_negfabs_f32:
972; GCN: buffer_load_dword [[X:v[0-9]+]]
973; GCN: buffer_load_dword [[Y:v[0-9]+]]
974
975; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
976; GCN: s_cmp_lg_u32
977; GCN: s_cselect_b64 s[0:1], -1, 0
978; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], [[NEG0]], -|[[X]]|, s[0:1]
979; GCN: v_mul_f32_e32 v{{[0-9]+}}, [[SELECT]], [[Y]]
980define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
981  %x = load volatile float, ptr addrspace(1) undef
982  %y = load volatile float, ptr addrspace(1) undef
983  %cmp = icmp eq i32 %c, 0
984  %fabs.x = call float @llvm.fabs.f32(float %x)
985  %fneg.fabs.x = fneg float %fabs.x
986  %select = select i1 %cmp, float -0.0, float %fneg.fabs.x
987  %add = fmul float %select, %y
988  store volatile float %add, ptr addrspace(1) undef
989  ret void
990}
991
992
993declare float @llvm.fabs.f32(float) #1
994declare float @llvm.fma.f32(float, float, float) #1
995declare float @llvm.fmuladd.f32(float, float, float) #1
996declare float @llvm.amdgcn.rcp.f32(float) #1
997declare float @llvm.amdgcn.rcp.legacy(float) #1
998declare float @llvm.amdgcn.fmul.legacy(float, float) #1
999
1000attributes #0 = { nounwind }
1001attributes #1 = { nounwind readnone }
1002