xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll (revision 6548b6354d1d990e1c98736f5e7c3de876bedc8e)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI,SI-SAFE %s
3; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI,SI-NSZ %s
4
5; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s
6; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s
7
8; --------------------------------------------------------------------------------
9; fadd tests
10; --------------------------------------------------------------------------------
11
12define float @v_fneg_add_f32(float %a, float %b) #0 {
13; GCN-SAFE-LABEL: v_fneg_add_f32:
14; GCN-SAFE:       ; %bb.0:
15; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16; GCN-SAFE-NEXT:    v_add_f32_e32 v0, v0, v1
17; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
18; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
19;
20; GCN-NSZ-LABEL: v_fneg_add_f32:
21; GCN-NSZ:       ; %bb.0:
22; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23; GCN-NSZ-NEXT:    v_sub_f32_e64 v0, -v0, v1
24; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
25  %add = fadd float %a, %b
26  %fneg = fneg float %add
27  ret float %fneg
28}
29
30define { float, float } @v_fneg_add_store_use_add_f32(float %a, float %b) #0 {
31; GCN-LABEL: v_fneg_add_store_use_add_f32:
32; GCN:       ; %bb.0:
33; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GCN-NEXT:    v_add_f32_e32 v1, v0, v1
35; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
36; GCN-NEXT:    s_setpc_b64 s[30:31]
37  %add = fadd float %a, %b
38  %fneg = fneg float %add
39  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
40  %insert.1 = insertvalue { float, float } %insert.0, float %add, 1
41  ret { float, float } %insert.1
42}
43
44define { float, float } @v_fneg_add_multi_use_add_f32(float %a, float %b) #0 {
45; GCN-SAFE-LABEL: v_fneg_add_multi_use_add_f32:
46; GCN-SAFE:       ; %bb.0:
47; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
48; GCN-SAFE-NEXT:    v_add_f32_e32 v1, v0, v1
49; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
50; GCN-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v1
51; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
52;
53; GCN-NSZ-LABEL: v_fneg_add_multi_use_add_f32:
54; GCN-NSZ:       ; %bb.0:
55; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GCN-NSZ-NEXT:    v_sub_f32_e64 v0, -v0, v1
57; GCN-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
58; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
59  %add = fadd float %a, %b
60  %fneg = fneg float %add
61  %use1 = fmul float %add, 4.0
62
63  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
64  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
65  ret { float, float } %insert.1
66}
67
68define float @v_fneg_add_fneg_x_f32(float %a, float %b) #0 {
69; GCN-SAFE-LABEL: v_fneg_add_fneg_x_f32:
70; GCN-SAFE:       ; %bb.0:
71; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72; GCN-SAFE-NEXT:    v_sub_f32_e32 v0, v1, v0
73; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
74; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
75;
76; GCN-NSZ-LABEL: v_fneg_add_fneg_x_f32:
77; GCN-NSZ:       ; %bb.0:
78; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GCN-NSZ-NEXT:    v_sub_f32_e32 v0, v0, v1
80; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
81  %fneg.a = fneg float %a
82  %add = fadd float %fneg.a, %b
83  %fneg = fneg float %add
84  ret float %fneg
85}
86
87define float @v_fneg_add_x_fneg_f32(float %a, float %b) #0 {
88; GCN-SAFE-LABEL: v_fneg_add_x_fneg_f32:
89; GCN-SAFE:       ; %bb.0:
90; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GCN-SAFE-NEXT:    v_sub_f32_e32 v0, v0, v1
92; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
93; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
94;
95; GCN-NSZ-LABEL: v_fneg_add_x_fneg_f32:
96; GCN-NSZ:       ; %bb.0:
97; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98; GCN-NSZ-NEXT:    v_sub_f32_e32 v0, v1, v0
99; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
100  %fneg.b = fneg float %b
101  %add = fadd float %a, %fneg.b
102  %fneg = fneg float %add
103  ret float %fneg
104}
105
106define float @v_fneg_add_fneg_fneg_f32(float %a, float %b) #0 {
107; GCN-SAFE-LABEL: v_fneg_add_fneg_fneg_f32:
108; GCN-SAFE:       ; %bb.0:
109; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; GCN-SAFE-NEXT:    v_sub_f32_e64 v0, -v0, v1
111; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
112; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
113;
114; GCN-NSZ-LABEL: v_fneg_add_fneg_fneg_f32:
115; GCN-NSZ:       ; %bb.0:
116; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GCN-NSZ-NEXT:    v_add_f32_e32 v0, v0, v1
118; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
119  %fneg.a = fneg float %a
120  %fneg.b = fneg float %b
121  %add = fadd float %fneg.a, %fneg.b
122  %fneg = fneg float %add
123  ret float %fneg
124}
125
126define { float, float } @v_fneg_add_store_use_fneg_x_f32(float %a, float %b) #0 {
127; GCN-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f32:
128; GCN-SAFE:       ; %bb.0:
129; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130; GCN-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
131; GCN-SAFE-NEXT:    v_sub_f32_e32 v0, v1, v0
132; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
133; GCN-SAFE-NEXT:    v_mov_b32_e32 v1, v2
134; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
135;
136; GCN-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f32:
137; GCN-NSZ:       ; %bb.0:
138; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; GCN-NSZ-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
140; GCN-NSZ-NEXT:    v_sub_f32_e32 v0, v0, v1
141; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, v2
142; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
143  %fneg.a = fneg float %a
144  %add = fadd float %fneg.a, %b
145  %fneg = fneg float %add
146  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
147  %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1
148  ret { float, float } %insert.1
149}
150
151define { float, float } @v_fneg_add_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
152; GCN-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f32:
153; GCN-SAFE:       ; %bb.0:
154; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155; GCN-SAFE-NEXT:    v_sub_f32_e32 v1, v1, v0
156; GCN-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
157; GCN-SAFE-NEXT:    v_mul_f32_e64 v1, -v0, v2
158; GCN-SAFE-NEXT:    v_mov_b32_e32 v0, v3
159; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
160;
161; GCN-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f32:
162; GCN-NSZ:       ; %bb.0:
163; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164; GCN-NSZ-NEXT:    v_sub_f32_e32 v3, v0, v1
165; GCN-NSZ-NEXT:    v_mul_f32_e64 v1, -v0, v2
166; GCN-NSZ-NEXT:    v_mov_b32_e32 v0, v3
167; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
168  %fneg.a = fneg float %a
169  %add = fadd float %fneg.a, %b
170  %fneg = fneg float %add
171  %use1 = fmul float %fneg.a, %c
172
173  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
174  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
175  ret { float, float } %insert.1
176}
177
178; This one asserted with -enable-no-signed-zeros-fp-math
179define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #0 {
180; SI-SAFE-LABEL: fneg_fadd_0_f32:
181; SI-SAFE:       ; %bb.0: ; %.entry
182; SI-SAFE-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
183; SI-SAFE-NEXT:    v_rcp_f32_e32 v1, v0
184; SI-SAFE-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
185; SI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
186; SI-SAFE-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
187; SI-SAFE-NEXT:    v_fma_f32 v1, v3, v1, v1
188; SI-SAFE-NEXT:    v_mul_f32_e32 v3, v2, v1
189; SI-SAFE-NEXT:    v_fma_f32 v4, -v0, v3, v2
190; SI-SAFE-NEXT:    v_fma_f32 v3, v4, v1, v3
191; SI-SAFE-NEXT:    v_fma_f32 v0, -v0, v3, v2
192; SI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
193; SI-SAFE-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
194; SI-SAFE-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
195; SI-SAFE-NEXT:    v_mad_f32 v0, v0, 0, 0
196; SI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
197; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
198; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
199; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
200; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
201; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
202; SI-SAFE-NEXT:    ; return to shader part epilog
203;
204; SI-NSZ-LABEL: fneg_fadd_0_f32:
205; SI-NSZ:       ; %bb.0: ; %.entry
206; SI-NSZ-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
207; SI-NSZ-NEXT:    v_rcp_f32_e32 v1, v0
208; SI-NSZ-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
209; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
210; SI-NSZ-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
211; SI-NSZ-NEXT:    v_fma_f32 v1, v3, v1, v1
212; SI-NSZ-NEXT:    v_mul_f32_e32 v3, v2, v1
213; SI-NSZ-NEXT:    v_fma_f32 v4, -v0, v3, v2
214; SI-NSZ-NEXT:    v_fma_f32 v3, v4, v1, v3
215; SI-NSZ-NEXT:    v_fma_f32 v0, -v0, v3, v2
216; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
217; SI-NSZ-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
218; SI-NSZ-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
219; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
220; SI-NSZ-NEXT:    v_mov_b32_e32 v1, s0
221; SI-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
222; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
223; SI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
224; SI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
225; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
226; SI-NSZ-NEXT:    ; return to shader part epilog
227;
228; VI-SAFE-LABEL: fneg_fadd_0_f32:
229; VI-SAFE:       ; %bb.0: ; %.entry
230; VI-SAFE-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
231; VI-SAFE-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
232; VI-SAFE-NEXT:    v_rcp_f32_e32 v2, v0
233; VI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
234; VI-SAFE-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
235; VI-SAFE-NEXT:    v_fma_f32 v2, v3, v2, v2
236; VI-SAFE-NEXT:    v_mul_f32_e32 v3, v1, v2
237; VI-SAFE-NEXT:    v_fma_f32 v4, -v0, v3, v1
238; VI-SAFE-NEXT:    v_fma_f32 v3, v4, v2, v3
239; VI-SAFE-NEXT:    v_fma_f32 v0, -v0, v3, v1
240; VI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
241; VI-SAFE-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
242; VI-SAFE-NEXT:    v_mov_b32_e32 v2, s0
243; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
244; VI-SAFE-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
245; VI-SAFE-NEXT:    v_mad_f32 v0, v0, 0, 0
246; VI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
247; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
248; VI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
249; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
250; VI-SAFE-NEXT:    ; return to shader part epilog
251;
252; VI-NSZ-LABEL: fneg_fadd_0_f32:
253; VI-NSZ:       ; %bb.0: ; %.entry
254; VI-NSZ-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
255; VI-NSZ-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
256; VI-NSZ-NEXT:    v_rcp_f32_e32 v2, v0
257; VI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
258; VI-NSZ-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
259; VI-NSZ-NEXT:    v_fma_f32 v2, v3, v2, v2
260; VI-NSZ-NEXT:    v_mul_f32_e32 v3, v1, v2
261; VI-NSZ-NEXT:    v_fma_f32 v4, -v0, v3, v1
262; VI-NSZ-NEXT:    v_fma_f32 v3, v4, v2, v3
263; VI-NSZ-NEXT:    v_fma_f32 v0, -v0, v3, v1
264; VI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
265; VI-NSZ-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
266; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s0
267; VI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
268; VI-NSZ-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
269; VI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
270; VI-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
271; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v2, vcc
272; VI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
273; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
274; VI-NSZ-NEXT:    ; return to shader part epilog
275.entry:
276  %tmp7 = fdiv float 1.000000e+00, %tmp6
277  %tmp8 = fmul float 0.000000e+00, %tmp7
278  %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
279  %.i188 = fadd float %tmp9, 0.000000e+00
280  %tmp10 = fcmp uge float %.i188, %tmp2
281  %tmp11 = fneg float %.i188
282  %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
283  %tmp12 = fcmp ule float %.i092, 0.000000e+00
284  %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
285  ret float %.i198
286}
287
288; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
289; function attribute unsafe-fp-math automatically. Combine with the previous test
290; when that is done.
291define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 {
292; SI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
293; SI-SAFE:       ; %bb.0: ; %.entry
294; SI-SAFE-NEXT:    v_min_legacy_f32_e64 v0, 0, s0
295; SI-SAFE-NEXT:    s_brev_b32 s0, 1
296; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
297; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
298; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
299; SI-SAFE-NEXT:    ; return to shader part epilog
300;
301; GCN-NSZ-LABEL: fneg_fadd_0_nsz_f32:
302; GCN-NSZ:       ; %bb.0: ; %.entry
303; GCN-NSZ-NEXT:    v_rcp_f32_e32 v0, s1
304; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, s0
305; GCN-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
306; GCN-NSZ-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
307; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
308; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
309; GCN-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
310; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
311; GCN-NSZ-NEXT:    ; return to shader part epilog
312;
313; VI-SAFE-LABEL: fneg_fadd_0_nsz_f32:
314; VI-SAFE:       ; %bb.0: ; %.entry
315; VI-SAFE-NEXT:    v_rcp_f32_e32 v0, s1
316; VI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
317; VI-SAFE-NEXT:    v_mul_f32_e32 v0, 0, v0
318; VI-SAFE-NEXT:    v_add_f32_e32 v0, 0, v0
319; VI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
320; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
321; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
322; VI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
323; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
324; VI-SAFE-NEXT:    ; return to shader part epilog
325.entry:
326  %tmp7 = fdiv afn float 1.000000e+00, %tmp6
327  %tmp8 = fmul float 0.000000e+00, %tmp7
328  %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
329  %.i188 = fadd float %tmp9, 0.000000e+00
330  %tmp10 = fcmp uge float %.i188, %tmp2
331  %tmp11 = fneg float %.i188
332  %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
333  %tmp12 = fcmp ule float %.i092, 0.000000e+00
334  %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
335  ret float %.i198
336}
337
338define double @v_fneg_add_f64(double %a, double %b) #0 {
339; GCN-SAFE-LABEL: v_fneg_add_f64:
340; GCN-SAFE:       ; %bb.0:
341; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342; GCN-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
343; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
344; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
345;
346; GCN-NSZ-LABEL: v_fneg_add_f64:
347; GCN-NSZ:       ; %bb.0:
348; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
349; GCN-NSZ-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
350; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
351  %add = fadd double %a, %b
352  %fneg = fneg double %add
353  ret double %fneg
354}
355
356define { double, double } @v_fneg_add_store_use_add_f64(double %a, double %b) #0 {
357; GCN-LABEL: v_fneg_add_store_use_add_f64:
358; GCN:       ; %bb.0:
359; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360; GCN-NEXT:    v_add_f64 v[2:3], v[0:1], v[2:3]
361; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
362; GCN-NEXT:    v_mov_b32_e32 v0, v2
363; GCN-NEXT:    s_setpc_b64 s[30:31]
364  %add = fadd double %a, %b
365  %fneg = fneg double %add
366  %insert.0 = insertvalue { double, double } poison, double %fneg, 0
367  %insert.1 = insertvalue { double, double } %insert.0, double %add, 1
368  ret { double, double } %insert.1
369}
370
371define { double, double } @v_fneg_add_multi_use_add_f64(double %a, double %b) #0 {
372; SI-SAFE-LABEL: v_fneg_add_multi_use_add_f64:
373; SI-SAFE:       ; %bb.0:
374; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375; SI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
376; SI-SAFE-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
377; SI-SAFE-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
378; SI-SAFE-NEXT:    v_mov_b32_e32 v1, v4
379; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
380;
381; GCN-NSZ-LABEL: v_fneg_add_multi_use_add_f64:
382; GCN-NSZ:       ; %bb.0:
383; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GCN-NSZ-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
385; GCN-NSZ-NEXT:    v_mul_f64 v[2:3], v[0:1], -4.0
386; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
387;
388; VI-SAFE-LABEL: v_fneg_add_multi_use_add_f64:
389; VI-SAFE:       ; %bb.0:
390; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391; VI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
392; VI-SAFE-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
393; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
394; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
395  %add = fadd double %a, %b
396  %fneg = fneg double %add
397  %use1 = fmul double %add, 4.0
398
399  %insert.0 = insertvalue { double, double } poison, double %fneg, 0
400  %insert.1 = insertvalue { double, double } %insert.0, double %use1, 1
401  ret { double, double } %insert.1
402}
403
404define double @v_fneg_add_fneg_x_f64(double %a, double %b) #0 {
405; GCN-SAFE-LABEL: v_fneg_add_fneg_x_f64:
406; GCN-SAFE:       ; %bb.0:
407; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
408; GCN-SAFE-NEXT:    v_add_f64 v[0:1], v[2:3], -v[0:1]
409; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
410; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
411;
412; GCN-NSZ-LABEL: v_fneg_add_fneg_x_f64:
413; GCN-NSZ:       ; %bb.0:
414; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415; GCN-NSZ-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
416; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
417  %fneg.a = fneg double %a
418  %add = fadd double %fneg.a, %b
419  %fneg = fneg double %add
420  ret double %fneg
421}
422
423define double @v_fneg_add_x_fneg_f64(double %a, double %b) #0 {
424; GCN-SAFE-LABEL: v_fneg_add_x_fneg_f64:
425; GCN-SAFE:       ; %bb.0:
426; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
427; GCN-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], -v[2:3]
428; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
429; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
430;
431; GCN-NSZ-LABEL: v_fneg_add_x_fneg_f64:
432; GCN-NSZ:       ; %bb.0:
433; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GCN-NSZ-NEXT:    v_add_f64 v[0:1], v[2:3], -v[0:1]
435; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
436  %fneg.b = fneg double %b
437  %add = fadd double %a, %fneg.b
438  %fneg = fneg double %add
439  ret double %fneg
440}
441
442define double @v_fneg_add_fneg_fneg_f64(double %a, double %b) #0 {
443; GCN-SAFE-LABEL: v_fneg_add_fneg_fneg_f64:
444; GCN-SAFE:       ; %bb.0:
445; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
446; GCN-SAFE-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[2:3]
447; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
448; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
449;
450; GCN-NSZ-LABEL: v_fneg_add_fneg_fneg_f64:
451; GCN-NSZ:       ; %bb.0:
452; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453; GCN-NSZ-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
454; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
455  %fneg.a = fneg double %a
456  %fneg.b = fneg double %b
457  %add = fadd double %fneg.a, %fneg.b
458  %fneg = fneg double %add
459  ret double %fneg
460}
461
462define { double, double } @v_fneg_add_store_use_fneg_x_f64(double %a, double %b) #0 {
463; SI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f64:
464; SI-SAFE:       ; %bb.0:
465; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
466; SI-SAFE-NEXT:    v_mov_b32_e32 v5, v1
467; SI-SAFE-NEXT:    v_mov_b32_e32 v4, v0
468; SI-SAFE-NEXT:    v_add_f64 v[0:1], v[2:3], -v[4:5]
469; SI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v5
470; SI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
471; SI-SAFE-NEXT:    v_mov_b32_e32 v2, v4
472; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
473;
474; SI-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f64:
475; SI-NSZ:       ; %bb.0:
476; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477; SI-NSZ-NEXT:    v_mov_b32_e32 v5, v1
478; SI-NSZ-NEXT:    v_mov_b32_e32 v4, v0
479; SI-NSZ-NEXT:    v_add_f64 v[0:1], v[4:5], -v[2:3]
480; SI-NSZ-NEXT:    v_xor_b32_e32 v3, 0x80000000, v5
481; SI-NSZ-NEXT:    v_mov_b32_e32 v2, v4
482; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
483;
484; VI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f64:
485; VI-SAFE:       ; %bb.0:
486; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
487; VI-SAFE-NEXT:    v_add_f64 v[4:5], v[2:3], -v[0:1]
488; VI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
489; VI-SAFE-NEXT:    v_mov_b32_e32 v2, v0
490; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v5
491; VI-SAFE-NEXT:    v_mov_b32_e32 v0, v4
492; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
493;
494; VI-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f64:
495; VI-NSZ:       ; %bb.0:
496; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
497; VI-NSZ-NEXT:    v_add_f64 v[4:5], v[0:1], -v[2:3]
498; VI-NSZ-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
499; VI-NSZ-NEXT:    v_mov_b32_e32 v2, v0
500; VI-NSZ-NEXT:    v_mov_b32_e32 v0, v4
501; VI-NSZ-NEXT:    v_mov_b32_e32 v1, v5
502; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
503  %fneg.a = fneg double %a
504  %add = fadd double %fneg.a, %b
505  %fneg = fneg double %add
506  %insert.0 = insertvalue { double, double } poison, double %fneg, 0
507  %insert.1 = insertvalue { double, double } %insert.0, double %fneg.a, 1
508  ret { double, double } %insert.1
509}
510
511define { double, double } @v_fneg_add_multi_use_fneg_x_f64(double %a, double %b, double %c) #0 {
512; SI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f64:
513; SI-SAFE:       ; %bb.0:
514; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515; SI-SAFE-NEXT:    v_add_f64 v[6:7], v[2:3], -v[0:1]
516; SI-SAFE-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[4:5]
517; SI-SAFE-NEXT:    v_xor_b32_e32 v7, 0x80000000, v7
518; SI-SAFE-NEXT:    v_mov_b32_e32 v0, v6
519; SI-SAFE-NEXT:    v_mov_b32_e32 v1, v7
520; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
521;
522; GCN-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f64:
523; GCN-NSZ:       ; %bb.0:
524; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525; GCN-NSZ-NEXT:    v_add_f64 v[6:7], v[0:1], -v[2:3]
526; GCN-NSZ-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[4:5]
527; GCN-NSZ-NEXT:    v_mov_b32_e32 v0, v6
528; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, v7
529; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
530;
531; VI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f64:
532; VI-SAFE:       ; %bb.0:
533; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
534; VI-SAFE-NEXT:    v_add_f64 v[6:7], v[2:3], -v[0:1]
535; VI-SAFE-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[4:5]
536; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v7
537; VI-SAFE-NEXT:    v_mov_b32_e32 v0, v6
538; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
539  %fneg.a = fneg double %a
540  %add = fadd double %fneg.a, %b
541  %fneg = fneg double %add
542  %use1 = fmul double %fneg.a, %c
543
544  %insert.0 = insertvalue { double, double } poison, double %fneg, 0
545  %insert.1 = insertvalue { double, double } %insert.0, double %use1, 1
546  ret { double, double } %insert.1
547}
548
549; This one asserted with -enable-no-signed-zeros-fp-math
550define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #0 {
551; SI-SAFE-LABEL: fneg_fadd_0_f64:
552; SI-SAFE:       ; %bb.0: ; %.entry
553; SI-SAFE-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
554; SI-SAFE-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
555; SI-SAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
556; SI-SAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
557; SI-SAFE-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
558; SI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
559; SI-SAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
560; SI-SAFE-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
561; SI-SAFE-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
562; SI-SAFE-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
563; SI-SAFE-NEXT:    v_mov_b32_e32 v2, s1
564; SI-SAFE-NEXT:    v_mov_b32_e32 v3, s0
565; SI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
566; SI-SAFE-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
567; SI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], 0
568; SI-SAFE-NEXT:    v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1]
569; SI-SAFE-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
570; SI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
571; SI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
572; SI-SAFE-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
573; SI-SAFE-NEXT:    s_and_b64 s[0:1], vcc, exec
574; SI-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
575; SI-SAFE-NEXT:    s_mov_b32 s0, 0
576; SI-SAFE-NEXT:    ; return to shader part epilog
577;
578; SI-NSZ-LABEL: fneg_fadd_0_f64:
579; SI-NSZ:       ; %bb.0: ; %.entry
580; SI-NSZ-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
581; SI-NSZ-NEXT:    s_mov_b32 s4, 0
582; SI-NSZ-NEXT:    s_brev_b32 s5, 1
583; SI-NSZ-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
584; SI-NSZ-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
585; SI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
586; SI-NSZ-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
587; SI-NSZ-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
588; SI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
589; SI-NSZ-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
590; SI-NSZ-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
591; SI-NSZ-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
592; SI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
593; SI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
594; SI-NSZ-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
595; SI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[4:5]
596; SI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
597; SI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
598; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
599; SI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
600; SI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
601; SI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
602; SI-NSZ-NEXT:    s_mov_b32 s0, 0
603; SI-NSZ-NEXT:    ; return to shader part epilog
604;
605; VI-SAFE-LABEL: fneg_fadd_0_f64:
606; VI-SAFE:       ; %bb.0: ; %.entry
607; VI-SAFE-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
608; VI-SAFE-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
609; VI-SAFE-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
610; VI-SAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
611; VI-SAFE-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
612; VI-SAFE-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
613; VI-SAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
614; VI-SAFE-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
615; VI-SAFE-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
616; VI-SAFE-NEXT:    v_mov_b32_e32 v4, s0
617; VI-SAFE-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
618; VI-SAFE-NEXT:    v_mov_b32_e32 v2, s1
619; VI-SAFE-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
620; VI-SAFE-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
621; VI-SAFE-NEXT:    v_add_f64 v[0:1], v[0:1], 0
622; VI-SAFE-NEXT:    v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1]
623; VI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v1
624; VI-SAFE-NEXT:    v_cndmask_b32_e32 v1, v3, v2, vcc
625; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
626; VI-SAFE-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
627; VI-SAFE-NEXT:    s_and_b64 s[0:1], vcc, exec
628; VI-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
629; VI-SAFE-NEXT:    s_mov_b32 s0, 0
630; VI-SAFE-NEXT:    ; return to shader part epilog
631;
632; VI-NSZ-LABEL: fneg_fadd_0_f64:
633; VI-NSZ:       ; %bb.0: ; %.entry
634; VI-NSZ-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
635; VI-NSZ-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
636; VI-NSZ-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
637; VI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
638; VI-NSZ-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
639; VI-NSZ-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
640; VI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
641; VI-NSZ-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
642; VI-NSZ-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
643; VI-NSZ-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
644; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
645; VI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
646; VI-NSZ-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
647; VI-NSZ-NEXT:    s_mov_b32 s2, 0
648; VI-NSZ-NEXT:    s_brev_b32 s3, 1
649; VI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
650; VI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
651; VI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
652; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
653; VI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
654; VI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
655; VI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
656; VI-NSZ-NEXT:    s_mov_b32 s0, 0
657; VI-NSZ-NEXT:    ; return to shader part epilog
658.entry:
659  %tmp7 = fdiv double 1.000000e+00, %tmp6
660  %tmp8 = fmul double 0.000000e+00, %tmp7
661  %tmp9 = fmul reassoc nnan arcp contract double 0.000000e+00, %tmp8
662  %.i188 = fadd double %tmp9, 0.000000e+00
663  %tmp10 = fcmp uge double %.i188, %tmp2
664  %tmp11 = fneg double %.i188
665  %.i092 = select i1 %tmp10, double %tmp2, double %tmp11
666  %tmp12 = fcmp ule double %.i092, 0.000000e+00
667  %.i198 = select i1 %tmp12, double 0.000000e+00, double 0x7FF8000000000000
668  ret double %.i198
669}
670
671; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
672; function attribute unsafe-fp-math automatically. Combine with the previous test
673; when that is done.
674define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #2 {
675; GCN-SAFE-LABEL: fneg_fadd_0_nsz_f64:
676; GCN-SAFE:       ; %bb.0: ; %.entry
677; GCN-SAFE-NEXT:    v_cmp_ngt_f64_e64 s[2:3], s[0:1], 0
678; GCN-SAFE-NEXT:    s_and_b64 s[2:3], s[2:3], exec
679; GCN-SAFE-NEXT:    s_cselect_b32 s1, s1, 0x80000000
680; GCN-SAFE-NEXT:    s_cselect_b32 s0, s0, 0
681; GCN-SAFE-NEXT:    v_cmp_ngt_f64_e64 s[0:1], s[0:1], 0
682; GCN-SAFE-NEXT:    s_and_b64 s[0:1], s[0:1], exec
683; GCN-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
684; GCN-SAFE-NEXT:    s_mov_b32 s0, 0
685; GCN-SAFE-NEXT:    ; return to shader part epilog
686;
687; SI-NSZ-LABEL: fneg_fadd_0_nsz_f64:
688; SI-NSZ:       ; %bb.0: ; %.entry
689; SI-NSZ-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
690; SI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
691; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
692; SI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
693; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
694; SI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
695; SI-NSZ-NEXT:    s_mov_b32 s2, 0
696; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
697; SI-NSZ-NEXT:    s_brev_b32 s3, 1
698; SI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
699; SI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
700; SI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
701; SI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
702; SI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
703; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
704; SI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
705; SI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
706; SI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
707; SI-NSZ-NEXT:    s_mov_b32 s0, 0
708; SI-NSZ-NEXT:    ; return to shader part epilog
709;
710; VI-NSZ-LABEL: fneg_fadd_0_nsz_f64:
711; VI-NSZ:       ; %bb.0: ; %.entry
712; VI-NSZ-NEXT:    v_rcp_f64_e32 v[0:1], s[2:3]
713; VI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
714; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
715; VI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
716; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
717; VI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
718; VI-NSZ-NEXT:    s_mov_b32 s2, 0
719; VI-NSZ-NEXT:    s_brev_b32 s3, 1
720; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
721; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
722; VI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
723; VI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
724; VI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
725; VI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
726; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
727; VI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
728; VI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
729; VI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
730; VI-NSZ-NEXT:    s_mov_b32 s0, 0
731; VI-NSZ-NEXT:    ; return to shader part epilog
732.entry:
733  %tmp7 = fdiv afn double 1.000000e+00, %tmp6
734  %tmp8 = fmul double 0.000000e+00, %tmp7
735  %tmp9 = fmul reassoc nnan arcp contract double 0.000000e+00, %tmp8
736  %.i188 = fadd double %tmp9, 0.000000e+00
737  %tmp10 = fcmp uge double %.i188, %tmp2
738  %tmp11 = fneg double %.i188
739  %.i092 = select i1 %tmp10, double %tmp2, double %tmp11
740  %tmp12 = fcmp ule double %.i092, 0.000000e+00
741  %.i198 = select i1 %tmp12, double 0.000000e+00, double 0x7FF8000000000000
742  ret double %.i198
743}
744
745; --------------------------------------------------------------------------------
746; fmul tests
747; --------------------------------------------------------------------------------
748
749define float @v_fneg_mul_f32(float %a, float %b) #0 {
750; GCN-LABEL: v_fneg_mul_f32:
751; GCN:       ; %bb.0:
752; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
753; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v1
754; GCN-NEXT:    s_setpc_b64 s[30:31]
755  %mul = fmul float %a, %b
756  %fneg = fneg float %mul
757  ret float %fneg
758}
759
760define { float, float } @v_fneg_mul_store_use_mul_f32(float %a, float %b) #0 {
761; GCN-LABEL: v_fneg_mul_store_use_mul_f32:
762; GCN:       ; %bb.0:
763; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
764; GCN-NEXT:    v_mul_f32_e32 v1, v0, v1
765; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
766; GCN-NEXT:    s_setpc_b64 s[30:31]
767  %mul = fmul float %a, %b
768  %fneg = fneg float %mul
769  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
770  %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1
771  ret { float, float } %insert.1
772}
773
774define { float, float } @v_fneg_mul_multi_use_mul_f32(float %a, float %b) #0 {
775; GCN-LABEL: v_fneg_mul_multi_use_mul_f32:
776; GCN:       ; %bb.0:
777; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
778; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v1
779; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
780; GCN-NEXT:    s_setpc_b64 s[30:31]
781  %mul = fmul float %a, %b
782  %fneg = fneg float %mul
783  %use1 = fmul float %mul, 4.0
784  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
785  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
786  ret { float, float } %insert.1
787}
788
789define float @v_fneg_mul_fneg_x_f32(float %a, float %b) #0 {
790; GCN-LABEL: v_fneg_mul_fneg_x_f32:
791; GCN:       ; %bb.0:
792; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
794; GCN-NEXT:    s_setpc_b64 s[30:31]
795  %fneg.a = fneg float %a
796  %mul = fmul float %fneg.a, %b
797  %fneg = fneg float %mul
798  ret float %fneg
799}
800
801define float @v_fneg_mul_x_fneg_f32(float %a, float %b) #0 {
802; GCN-LABEL: v_fneg_mul_x_fneg_f32:
803; GCN:       ; %bb.0:
804; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
805; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
806; GCN-NEXT:    s_setpc_b64 s[30:31]
807  %fneg.b = fneg float %b
808  %mul = fmul float %a, %fneg.b
809  %fneg = fneg float %mul
810  ret float %fneg
811}
812
813define float @v_fneg_mul_fneg_fneg_f32(float %a, float %b) #0 {
814; GCN-LABEL: v_fneg_mul_fneg_fneg_f32:
815; GCN:       ; %bb.0:
816; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v1
818; GCN-NEXT:    s_setpc_b64 s[30:31]
819  %fneg.a = fneg float %a
820  %fneg.b = fneg float %b
821  %mul = fmul float %fneg.a, %fneg.b
822  %fneg = fneg float %mul
823  ret float %fneg
824}
825
826define { float, float } @v_fneg_mul_store_use_fneg_x_f32(float %a, float %b) #0 {
827; GCN-LABEL: v_fneg_mul_store_use_fneg_x_f32:
828; GCN:       ; %bb.0:
829; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
831; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
832; GCN-NEXT:    v_mov_b32_e32 v1, v2
833; GCN-NEXT:    s_setpc_b64 s[30:31]
834  %fneg.a = fneg float %a
835  %mul = fmul float %fneg.a, %b
836  %fneg = fneg float %mul
837  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
838  %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1
839  ret { float, float } %insert.1
840}
841
842define { float, float } @v_fneg_mul_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
843; GCN-LABEL: v_fneg_mul_multi_use_fneg_x_f32:
844; GCN:       ; %bb.0:
845; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
846; GCN-NEXT:    v_mul_f32_e32 v3, v0, v1
847; GCN-NEXT:    v_mul_f32_e64 v1, -v0, v2
848; GCN-NEXT:    v_mov_b32_e32 v0, v3
849; GCN-NEXT:    s_setpc_b64 s[30:31]
850  %fneg.a = fneg float %a
851  %mul = fmul float %fneg.a, %b
852  %fneg = fneg float %mul
853  %use1 = fmul float %fneg.a, %c
854  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
855  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
856  ret { float, float } %insert.1
857}
858
859; --------------------------------------------------------------------------------
860; fminnum tests
861; --------------------------------------------------------------------------------
862
863define float @v_fneg_minnum_f32_ieee(float %a, float %b) #0 {
864; GCN-LABEL: v_fneg_minnum_f32_ieee:
865; GCN:       ; %bb.0:
866; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
868; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
869; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
870; GCN-NEXT:    s_setpc_b64 s[30:31]
871  %min = call float @llvm.minnum.f32(float %a, float %b)
872  %fneg = fneg float %min
873  ret float %fneg
874}
875
876define float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #4 {
877; GCN-LABEL: v_fneg_minnum_f32_no_ieee:
878; GCN:       ; %bb.0:
879; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
880; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
881; GCN-NEXT:    s_setpc_b64 s[30:31]
882  %min = call float @llvm.minnum.f32(float %a, float %b)
883  %fneg = fneg float %min
884  ret float %fneg
885}
886
887define float @v_fneg_self_minnum_f32_ieee(float %a) #0 {
888; GCN-LABEL: v_fneg_self_minnum_f32_ieee:
889; GCN:       ; %bb.0:
890; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
892; GCN-NEXT:    s_setpc_b64 s[30:31]
893  %min = call float @llvm.minnum.f32(float %a, float %a)
894  %min.fneg = fneg float %min
895  ret float %min.fneg
896}
897
898define float @v_fneg_self_minnum_f32_no_ieee(float %a) #4 {
899; GCN-LABEL: v_fneg_self_minnum_f32_no_ieee:
900; GCN:       ; %bb.0:
901; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
903; GCN-NEXT:    s_setpc_b64 s[30:31]
904  %min = call float @llvm.minnum.f32(float %a, float %a)
905  %min.fneg = fneg float %min
906  ret float %min.fneg
907}
908
909define float @v_fneg_posk_minnum_f32_ieee(float %a) #0 {
910; GCN-LABEL: v_fneg_posk_minnum_f32_ieee:
911; GCN:       ; %bb.0:
912; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
914; GCN-NEXT:    v_max_f32_e32 v0, -4.0, v0
915; GCN-NEXT:    s_setpc_b64 s[30:31]
916  %min = call float @llvm.minnum.f32(float 4.0, float %a)
917  %fneg = fneg float %min
918  ret float %fneg
919}
920
921define float @v_fneg_posk_minnum_f32_no_ieee(float %a) #4 {
922; GCN-LABEL: v_fneg_posk_minnum_f32_no_ieee:
923; GCN:       ; %bb.0:
924; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
925; GCN-NEXT:    v_max_f32_e64 v0, -v0, -4.0
926; GCN-NEXT:    s_setpc_b64 s[30:31]
927  %min = call float @llvm.minnum.f32(float 4.0, float %a)
928  %fneg = fneg float %min
929  ret float %fneg
930}
931
932define float @v_fneg_negk_minnum_f32_ieee(float %a) #0 {
933; GCN-LABEL: v_fneg_negk_minnum_f32_ieee:
934; GCN:       ; %bb.0:
935; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
937; GCN-NEXT:    v_max_f32_e32 v0, 4.0, v0
938; GCN-NEXT:    s_setpc_b64 s[30:31]
939  %min = call float @llvm.minnum.f32(float -4.0, float %a)
940  %fneg = fneg float %min
941  ret float %fneg
942}
943
944define float @v_fneg_negk_minnum_f32_no_ieee(float %a) #4 {
945; GCN-LABEL: v_fneg_negk_minnum_f32_no_ieee:
946; GCN:       ; %bb.0:
947; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
948; GCN-NEXT:    v_max_f32_e64 v0, -v0, 4.0
949; GCN-NEXT:    s_setpc_b64 s[30:31]
950  %min = call float @llvm.minnum.f32(float -4.0, float %a)
951  %fneg = fneg float %min
952  ret float %fneg
953}
954
955define float @v_fneg_0_minnum_f32(float %a) #0 {
956; GCN-LABEL: v_fneg_0_minnum_f32:
957; GCN:       ; %bb.0:
958; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
959; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
960; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
961; GCN-NEXT:    s_setpc_b64 s[30:31]
962  %min = call nnan float @llvm.minnum.f32(float 0.0, float %a)
963  %fneg = fneg float %min
964  ret float %fneg
965}
966
967define float @v_fneg_neg0_minnum_f32_ieee(float %a) #0 {
968; GCN-LABEL: v_fneg_neg0_minnum_f32_ieee:
969; GCN:       ; %bb.0:
970; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
971; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
972; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
973; GCN-NEXT:    s_setpc_b64 s[30:31]
974  %min = call float @llvm.minnum.f32(float -0.0, float %a)
975  %fneg = fneg float %min
976  ret float %fneg
977}
978
979define float @v_fneg_inv2pi_minnum_f32(float %a) #0 {
980; SI-LABEL: v_fneg_inv2pi_minnum_f32:
981; SI:       ; %bb.0:
982; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983; SI-NEXT:    v_mul_f32_e32 v0, -1.0, v0
984; SI-NEXT:    v_max_f32_e32 v0, 0xbe22f983, v0
985; SI-NEXT:    s_setpc_b64 s[30:31]
986;
987; VI-LABEL: v_fneg_inv2pi_minnum_f32:
988; VI:       ; %bb.0:
989; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
990; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
991; VI-NEXT:    v_min_f32_e32 v0, 0.15915494, v0
992; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
993; VI-NEXT:    s_setpc_b64 s[30:31]
994  %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
995  %fneg = fneg float %min
996  ret float %fneg
997}
998
999define float @v_fneg_neg_inv2pi_minnum_f32(float %a) #0 {
1000; SI-LABEL: v_fneg_neg_inv2pi_minnum_f32:
1001; SI:       ; %bb.0:
1002; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1003; SI-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1004; SI-NEXT:    v_max_f32_e32 v0, 0x3e22f983, v0
1005; SI-NEXT:    s_setpc_b64 s[30:31]
1006;
1007; VI-LABEL: v_fneg_neg_inv2pi_minnum_f32:
1008; VI:       ; %bb.0:
1009; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1010; VI-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1011; VI-NEXT:    v_max_f32_e32 v0, 0.15915494, v0
1012; VI-NEXT:    s_setpc_b64 s[30:31]
1013  %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
1014  %fneg = fneg float %min
1015  ret float %fneg
1016}
1017
1018define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
1019; SI-LABEL: v_fneg_inv2pi_minnum_f16:
1020; SI:       ; %bb.0:
1021; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1022; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1023; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1024; SI-NEXT:    v_max_f32_e32 v0, 0xbe230000, v0
1025; SI-NEXT:    s_setpc_b64 s[30:31]
1026;
1027; VI-LABEL: v_fneg_inv2pi_minnum_f16:
1028; VI:       ; %bb.0:
1029; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1030; VI-NEXT:    v_max_f16_e32 v0, v0, v0
1031; VI-NEXT:    v_min_f16_e32 v0, 0.15915494, v0
1032; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1033; VI-NEXT:    s_setpc_b64 s[30:31]
1034  %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
1035  %fneg = fneg half %min
1036  ret half %fneg
1037}
1038
1039define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
1040; SI-LABEL: v_fneg_neg_inv2pi_minnum_f16:
1041; SI:       ; %bb.0:
1042; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1043; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1044; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1045; SI-NEXT:    v_max_f32_e32 v0, 0x3e230000, v0
1046; SI-NEXT:    s_setpc_b64 s[30:31]
1047;
1048; VI-LABEL: v_fneg_neg_inv2pi_minnum_f16:
1049; VI:       ; %bb.0:
1050; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1051; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1052; VI-NEXT:    v_max_f16_e32 v0, 0.15915494, v0
1053; VI-NEXT:    s_setpc_b64 s[30:31]
1054  %min = call half @llvm.minnum.f16(half 0xHB118, half %a)
1055  %fneg = fneg half %min
1056  ret half %fneg
1057}
1058
1059define double @v_fneg_inv2pi_minnum_f64(double %a) #0 {
1060; SI-LABEL: v_fneg_inv2pi_minnum_f64:
1061; SI:       ; %bb.0:
1062; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1064; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
1065; SI-NEXT:    s_mov_b32 s5, 0xbfc45f30
1066; SI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
1067; SI-NEXT:    s_setpc_b64 s[30:31]
1068;
1069; VI-LABEL: v_fneg_inv2pi_minnum_f64:
1070; VI:       ; %bb.0:
1071; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1072; VI-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
1073; VI-NEXT:    v_min_f64 v[0:1], v[0:1], 0.15915494309189532
1074; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1075; VI-NEXT:    s_setpc_b64 s[30:31]
1076  %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a)
1077  %fneg = fneg double %min
1078  ret double %fneg
1079}
1080
1081define double @v_fneg_neg_inv2pi_minnum_f64(double %a) #0 {
1082; SI-LABEL: v_fneg_neg_inv2pi_minnum_f64:
1083; SI:       ; %bb.0:
1084; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1085; SI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1086; SI-NEXT:    s_mov_b32 s4, 0x6dc9c882
1087; SI-NEXT:    s_mov_b32 s5, 0x3fc45f30
1088; SI-NEXT:    v_max_f64 v[0:1], v[0:1], s[4:5]
1089; SI-NEXT:    s_setpc_b64 s[30:31]
1090;
1091; VI-LABEL: v_fneg_neg_inv2pi_minnum_f64:
1092; VI:       ; %bb.0:
1093; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1094; VI-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
1095; VI-NEXT:    v_max_f64 v[0:1], v[0:1], 0.15915494309189532
1096; VI-NEXT:    s_setpc_b64 s[30:31]
1097  %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a)
1098  %fneg = fneg double %min
1099  ret double %fneg
1100}
1101
1102define float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #4 {
1103; GCN-LABEL: v_fneg_neg0_minnum_f32_no_ieee:
1104; GCN:       ; %bb.0:
1105; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106; GCN-NEXT:    v_max_f32_e64 v0, -v0, 0
1107; GCN-NEXT:    s_setpc_b64 s[30:31]
1108  %min = call float @llvm.minnum.f32(float -0.0, float %a)
1109  %fneg = fneg float %min
1110  ret float %fneg
1111}
1112
1113define float @v_fneg_0_minnum_foldable_use_f32_ieee(float %a, float %b) #0 {
1114; GCN-LABEL: v_fneg_0_minnum_foldable_use_f32_ieee:
1115; GCN:       ; %bb.0:
1116; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1117; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1118; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
1119; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1120; GCN-NEXT:    s_setpc_b64 s[30:31]
1121  %min = call float @llvm.minnum.f32(float 0.0, float %a)
1122  %fneg = fneg float %min
1123  %mul = fmul float %fneg, %b
1124  ret float %mul
1125}
1126
1127define float @v_fneg_inv2pi_minnum_foldable_use_f32(float %a, float %b) #0 {
1128; SI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f32:
1129; SI:       ; %bb.0:
1130; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1131; SI-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1132; SI-NEXT:    v_max_f32_e32 v0, 0xbe22f983, v0
1133; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
1134; SI-NEXT:    s_setpc_b64 s[30:31]
1135;
1136; VI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f32:
1137; VI:       ; %bb.0:
1138; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1139; VI-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1140; VI-NEXT:    v_min_f32_e32 v0, 0.15915494, v0
1141; VI-NEXT:    v_mul_f32_e64 v0, -v0, v1
1142; VI-NEXT:    s_setpc_b64 s[30:31]
1143  %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
1144  %fneg = fneg float %min
1145  %mul = fmul float %fneg, %b
1146  ret float %mul
1147}
1148
1149define float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
1150; GCN-LABEL: v_fneg_0_minnum_foldable_use_f32_no_ieee:
1151; GCN:       ; %bb.0:
1152; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
1154; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1155; GCN-NEXT:    s_setpc_b64 s[30:31]
1156  %min = call float @llvm.minnum.f32(float 0.0, float %a)
1157  %fneg = fneg float %min
1158  %mul = fmul float %fneg, %b
1159  ret float %mul
1160}
1161
1162define { float, float } @v_fneg_minnum_multi_use_minnum_f32_ieee(float %a, float %b) #0 {
1163; GCN-LABEL: v_fneg_minnum_multi_use_minnum_f32_ieee:
1164; GCN:       ; %bb.0:
1165; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
1167; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1168; GCN-NEXT:    v_max_f32_e32 v0, v0, v1
1169; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1170; GCN-NEXT:    s_setpc_b64 s[30:31]
1171  %min = call float @llvm.minnum.f32(float %a, float %b)
1172  %fneg = fneg float %min
1173  %use1 = fmul float %min, 4.0
1174  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1175  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1176  ret { float, float } %insert.1
1177}
1178
1179define <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #4 {
1180; GCN-LABEL: v_fneg_minnum_multi_use_minnum_f32_no_ieee:
1181; GCN:       ; %bb.0:
1182; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1183; GCN-NEXT:    v_max_f32_e64 v0, -v0, -v1
1184; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1185; GCN-NEXT:    s_setpc_b64 s[30:31]
1186  %min = call float @llvm.minnum.f32(float %a, float %b)
1187  %fneg = fneg float %min
1188  %use1 = fmul float %min, 4.0
1189  %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1190  %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1191  ret <2 x float> %ins1
1192}
1193
1194; --------------------------------------------------------------------------------
1195; fmaxnum tests
1196; --------------------------------------------------------------------------------
1197
1198define float @v_fneg_maxnum_f32_ieee(float %a, float %b) #0 {
1199; GCN-LABEL: v_fneg_maxnum_f32_ieee:
1200; GCN:       ; %bb.0:
1201; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1202; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
1203; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1204; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
1205; GCN-NEXT:    s_setpc_b64 s[30:31]
1206  %max = call float @llvm.maxnum.f32(float %a, float %b)
1207  %fneg = fneg float %max
1208  ret float %fneg
1209}
1210
1211define float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #4 {
1212; GCN-LABEL: v_fneg_maxnum_f32_no_ieee:
1213; GCN:       ; %bb.0:
1214; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
1216; GCN-NEXT:    s_setpc_b64 s[30:31]
1217  %max = call float @llvm.maxnum.f32(float %a, float %b)
1218  %fneg = fneg float %max
1219  ret float %fneg
1220}
1221
1222define float @v_fneg_self_maxnum_f32_ieee(float %a) #0 {
1223; GCN-LABEL: v_fneg_self_maxnum_f32_ieee:
1224; GCN:       ; %bb.0:
1225; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1226; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1227; GCN-NEXT:    s_setpc_b64 s[30:31]
1228  %max = call float @llvm.maxnum.f32(float %a, float %a)
1229  %max.fneg = fneg float %max
1230  ret float %max.fneg
1231}
1232
1233define float @v_fneg_self_maxnum_f32_no_ieee(float %a) #4 {
1234; GCN-LABEL: v_fneg_self_maxnum_f32_no_ieee:
1235; GCN:       ; %bb.0:
1236; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1237; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1238; GCN-NEXT:    s_setpc_b64 s[30:31]
1239  %max = call float @llvm.maxnum.f32(float %a, float %a)
1240  %max.fneg = fneg float %max
1241  ret float %max.fneg
1242}
1243
1244define float @v_fneg_posk_maxnum_f32_ieee(float %a) #0 {
1245; GCN-LABEL: v_fneg_posk_maxnum_f32_ieee:
1246; GCN:       ; %bb.0:
1247; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1249; GCN-NEXT:    v_min_f32_e32 v0, -4.0, v0
1250; GCN-NEXT:    s_setpc_b64 s[30:31]
1251  %max = call float @llvm.maxnum.f32(float 4.0, float %a)
1252  %fneg = fneg float %max
1253  ret float %fneg
1254}
1255
1256define float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #4 {
1257; GCN-LABEL: v_fneg_posk_maxnum_f32_no_ieee:
1258; GCN:       ; %bb.0:
1259; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1260; GCN-NEXT:    v_min_f32_e64 v0, -v0, -4.0
1261; GCN-NEXT:    s_setpc_b64 s[30:31]
1262  %max = call float @llvm.maxnum.f32(float 4.0, float %a)
1263  %fneg = fneg float %max
1264  ret float %fneg
1265}
1266
1267define float @v_fneg_negk_maxnum_f32_ieee(float %a) #0 {
1268; GCN-LABEL: v_fneg_negk_maxnum_f32_ieee:
1269; GCN:       ; %bb.0:
1270; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1271; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1272; GCN-NEXT:    v_min_f32_e32 v0, 4.0, v0
1273; GCN-NEXT:    s_setpc_b64 s[30:31]
1274  %max = call float @llvm.maxnum.f32(float -4.0, float %a)
1275  %fneg = fneg float %max
1276  ret float %fneg
1277}
1278
1279define float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #4 {
1280; GCN-LABEL: v_fneg_negk_maxnum_f32_no_ieee:
1281; GCN:       ; %bb.0:
1282; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1283; GCN-NEXT:    v_min_f32_e64 v0, -v0, 4.0
1284; GCN-NEXT:    s_setpc_b64 s[30:31]
1285  %max = call float @llvm.maxnum.f32(float -4.0, float %a)
1286  %fneg = fneg float %max
1287  ret float %fneg
1288}
1289
1290define float @v_fneg_0_maxnum_f32(float %a) #0 {
1291; GCN-LABEL: v_fneg_0_maxnum_f32:
1292; GCN:       ; %bb.0:
1293; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
1295; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1296; GCN-NEXT:    s_setpc_b64 s[30:31]
1297  %max = call nnan float @llvm.maxnum.f32(float 0.0, float %a)
1298  %fneg = fneg float %max
1299  ret float %fneg
1300}
1301
1302define float @v_fneg_neg0_maxnum_f32_ieee(float %a) #0 {
1303; GCN-LABEL: v_fneg_neg0_maxnum_f32_ieee:
1304; GCN:       ; %bb.0:
1305; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1306; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1307; GCN-NEXT:    v_min_f32_e32 v0, 0, v0
1308; GCN-NEXT:    s_setpc_b64 s[30:31]
1309  %max = call float @llvm.maxnum.f32(float -0.0, float %a)
1310  %fneg = fneg float %max
1311  ret float %fneg
1312}
1313
1314define float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #4 {
1315; GCN-LABEL: v_fneg_neg0_maxnum_f32_no_ieee:
1316; GCN:       ; %bb.0:
1317; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1318; GCN-NEXT:    v_min_f32_e64 v0, -v0, 0
1319; GCN-NEXT:    s_setpc_b64 s[30:31]
1320  %max = call float @llvm.maxnum.f32(float -0.0, float %a)
1321  %fneg = fneg float %max
1322  ret float %fneg
1323}
1324
1325define float @v_fneg_0_maxnum_foldable_use_f32_ieee(float %a, float %b) #0 {
1326; GCN-LABEL: v_fneg_0_maxnum_foldable_use_f32_ieee:
1327; GCN:       ; %bb.0:
1328; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1329; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
1330; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
1331; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1332; GCN-NEXT:    s_setpc_b64 s[30:31]
1333  %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1334  %fneg = fneg float %max
1335  %mul = fmul float %fneg, %b
1336  ret float %mul
1337}
1338
1339define float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #4 {
1340; GCN-LABEL: v_fneg_0_maxnum_foldable_use_f32_no_ieee:
1341; GCN:       ; %bb.0:
1342; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343; GCN-NEXT:    v_max_f32_e32 v0, 0, v0
1344; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
1345; GCN-NEXT:    s_setpc_b64 s[30:31]
1346  %max = call float @llvm.maxnum.f32(float 0.0, float %a)
1347  %fneg = fneg float %max
1348  %mul = fmul float %fneg, %b
1349  ret float %mul
1350}
1351
1352define { float, float } @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float %a, float %b) #0 {
1353; GCN-LABEL: v_fneg_maxnum_multi_use_maxnum_f32_ieee:
1354; GCN:       ; %bb.0:
1355; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356; GCN-NEXT:    v_mul_f32_e32 v1, -1.0, v1
1357; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
1358; GCN-NEXT:    v_min_f32_e32 v0, v0, v1
1359; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1360; GCN-NEXT:    s_setpc_b64 s[30:31]
1361  %max = call float @llvm.maxnum.f32(float %a, float %b)
1362  %fneg = fneg float %max
1363  %use1 = fmul float %max, 4.0
1364  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1365  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1366  ret { float, float } %insert.1
1367}
1368
1369define <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #4 {
1370; GCN-LABEL: v_fneg_maxnum_multi_use_maxnum_f32_no_ieee:
1371; GCN:       ; %bb.0:
1372; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1373; GCN-NEXT:    v_min_f32_e64 v0, -v0, -v1
1374; GCN-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1375; GCN-NEXT:    s_setpc_b64 s[30:31]
1376  %max = call float @llvm.maxnum.f32(float %a, float %b)
1377  %fneg = fneg float %max
1378  %use1 = fmul float %max, 4.0
1379  %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
1380  %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
1381  ret <2 x float> %ins1
1382}
1383
1384; --------------------------------------------------------------------------------
1385; fma tests
1386; --------------------------------------------------------------------------------
1387
1388define float @v_fneg_fma_f32(float %a, float %b, float %c) #0 {
1389; GCN-SAFE-LABEL: v_fneg_fma_f32:
1390; GCN-SAFE:       ; %bb.0:
1391; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392; GCN-SAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
1393; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1394; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1395;
1396; GCN-NSZ-LABEL: v_fneg_fma_f32:
1397; GCN-NSZ:       ; %bb.0:
1398; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1399; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
1400; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1401  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1402  %fneg = fneg float %fma
1403  ret float %fneg
1404}
1405
1406define { float, float } @v_fneg_fma_store_use_fma_f32(float %a, float %b, float %c) #0 {
1407; GCN-LABEL: v_fneg_fma_store_use_fma_f32:
1408; GCN:       ; %bb.0:
1409; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1410; GCN-NEXT:    v_fma_f32 v1, v0, v1, v2
1411; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1412; GCN-NEXT:    s_setpc_b64 s[30:31]
1413  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1414  %fneg = fneg float %fma
1415  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1416  %insert.1 = insertvalue { float, float } %insert.0, float %fma, 1
1417  ret { float, float } %insert.1
1418}
1419
1420define { float, float } @v_fneg_fma_multi_use_fma_f32(float %a, float %b, float %c) #0 {
1421; GCN-SAFE-LABEL: v_fneg_fma_multi_use_fma_f32:
1422; GCN-SAFE:       ; %bb.0:
1423; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424; GCN-SAFE-NEXT:    v_fma_f32 v1, v0, v1, v2
1425; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1426; GCN-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v1
1427; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1428;
1429; GCN-NSZ-LABEL: v_fneg_fma_multi_use_fma_f32:
1430; GCN-NSZ:       ; %bb.0:
1431; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1432; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
1433; GCN-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1434; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1435  %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
1436  %fneg = fneg float %fma
1437  %use1 = fmul float %fma, 4.0
1438  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1439  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1440  ret { float, float } %insert.1
1441}
1442
1443define float @v_fneg_fma_fneg_x_y_f32(float %a, float %b, float %c) #0 {
1444; GCN-SAFE-LABEL: v_fneg_fma_fneg_x_y_f32:
1445; GCN-SAFE:       ; %bb.0:
1446; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1447; GCN-SAFE-NEXT:    v_fma_f32 v0, -v0, v1, v2
1448; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1449; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1450;
1451; GCN-NSZ-LABEL: v_fneg_fma_fneg_x_y_f32:
1452; GCN-NSZ:       ; %bb.0:
1453; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1454; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, v1, -v2
1455; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1456  %fneg.a = fneg float %a
1457  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1458  %fneg = fneg float %fma
1459  ret float %fneg
1460}
1461
1462define float @v_fneg_fma_x_fneg_y_f32(float %a, float %b, float %c) #0 {
1463; GCN-SAFE-LABEL: v_fneg_fma_x_fneg_y_f32:
1464; GCN-SAFE:       ; %bb.0:
1465; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1466; GCN-SAFE-NEXT:    v_fma_f32 v0, v0, -v1, v2
1467; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1468; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1469;
1470; GCN-NSZ-LABEL: v_fneg_fma_x_fneg_y_f32:
1471; GCN-NSZ:       ; %bb.0:
1472; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, v1, -v2
1474; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1475  %fneg.b = fneg float %b
1476  %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
1477  %fneg = fneg float %fma
1478  ret float %fneg
1479}
1480
1481define float @v_fneg_fma_fneg_fneg_y_f32(float %a, float %b, float %c) #0 {
1482; GCN-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f32:
1483; GCN-SAFE:       ; %bb.0:
1484; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1485; GCN-SAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
1486; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1487; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1488;
1489; GCN-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f32:
1490; GCN-NSZ:       ; %bb.0:
1491; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1492; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
1493; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1494  %fneg.a = fneg float %a
1495  %fneg.b = fneg float %b
1496  %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
1497  %fneg = fneg float %fma
1498  ret float %fneg
1499}
1500
1501define float @v_fneg_fma_fneg_x_fneg_f32(float %a, float %b, float %c) #0 {
1502; GCN-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f32:
1503; GCN-SAFE:       ; %bb.0:
1504; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1505; GCN-SAFE-NEXT:    v_fma_f32 v0, -v0, v1, -v2
1506; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1507; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1508;
1509; GCN-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f32:
1510; GCN-NSZ:       ; %bb.0:
1511; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1512; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, v1, v2
1513; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1514  %fneg.a = fneg float %a
1515  %fneg.c = fneg float %c
1516  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
1517  %fneg = fneg float %fma
1518  ret float %fneg
1519}
1520
1521define float @v_fneg_fma_x_y_fneg_f32(float %a, float %b, float %c) #0 {
1522; GCN-SAFE-LABEL: v_fneg_fma_x_y_fneg_f32:
1523; GCN-SAFE:       ; %bb.0:
1524; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1525; GCN-SAFE-NEXT:    v_fma_f32 v0, v0, v1, -v2
1526; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1527; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1528;
1529; GCN-NSZ-LABEL: v_fneg_fma_x_y_fneg_f32:
1530; GCN-NSZ:       ; %bb.0:
1531; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1532; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, v2
1533; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1534  %fneg.c = fneg float %c
1535  %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
1536  %fneg = fneg float %fma
1537  ret float %fneg
1538}
1539
1540define { float, float } @v_fneg_fma_store_use_fneg_x_y_f32(float %a, float %b, float %c) #0 {
1541; GCN-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f32:
1542; GCN-SAFE:       ; %bb.0:
1543; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1544; GCN-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v0
1545; GCN-SAFE-NEXT:    v_fma_f32 v0, -v0, v1, v2
1546; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1547; GCN-SAFE-NEXT:    v_mov_b32_e32 v1, v3
1548; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1549;
1550; GCN-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f32:
1551; GCN-NSZ:       ; %bb.0:
1552; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1553; GCN-NSZ-NEXT:    v_xor_b32_e32 v3, 0x80000000, v0
1554; GCN-NSZ-NEXT:    v_fma_f32 v0, v0, v1, -v2
1555; GCN-NSZ-NEXT:    v_mov_b32_e32 v1, v3
1556; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1557  %fneg.a = fneg float %a
1558  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1559  %fneg = fneg float %fma
1560  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1561  %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1
1562  ret { float, float } %insert.1
1563}
1564
1565define { float, float } @v_fneg_fma_multi_use_fneg_x_y_f32(float %a, float %b, float %c, float %d) #0 {
1566; GCN-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f32:
1567; GCN-SAFE:       ; %bb.0:
1568; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1569; GCN-SAFE-NEXT:    v_fma_f32 v1, -v0, v1, v2
1570; GCN-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
1571; GCN-SAFE-NEXT:    v_mul_f32_e64 v1, -v0, v3
1572; GCN-SAFE-NEXT:    v_mov_b32_e32 v0, v2
1573; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1574;
1575; GCN-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f32:
1576; GCN-NSZ:       ; %bb.0:
1577; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1578; GCN-NSZ-NEXT:    v_fma_f32 v2, v0, v1, -v2
1579; GCN-NSZ-NEXT:    v_mul_f32_e64 v1, -v0, v3
1580; GCN-NSZ-NEXT:    v_mov_b32_e32 v0, v2
1581; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1582  %fneg.a = fneg float %a
1583  %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
1584  %fneg = fneg float %fma
1585  %use1 = fmul float %fneg.a, %d
1586  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1587  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1588  ret { float, float } %insert.1
1589}
1590
1591; --------------------------------------------------------------------------------
1592; fmad tests
1593; --------------------------------------------------------------------------------
1594
1595define float @v_fneg_fmad_f32(float %a, float %b, float %c) #0 {
1596; GCN-SAFE-LABEL: v_fneg_fmad_f32:
1597; GCN-SAFE:       ; %bb.0:
1598; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1599; GCN-SAFE-NEXT:    v_mac_f32_e32 v2, v0, v1
1600; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v2
1601; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1602;
1603; GCN-NSZ-LABEL: v_fneg_fmad_f32:
1604; GCN-NSZ:       ; %bb.0:
1605; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1606; GCN-NSZ-NEXT:    v_mad_f32 v0, v0, -v1, -v2
1607; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1608  %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1609  %fneg = fneg float %fma
1610  ret float %fneg
1611}
1612
1613define <4 x float> @v_fneg_fmad_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
1614; GCN-SAFE-LABEL: v_fneg_fmad_v4f32:
1615; GCN-SAFE:       ; %bb.0:
1616; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1617; GCN-SAFE-NEXT:    v_mac_f32_e32 v11, v3, v7
1618; GCN-SAFE-NEXT:    v_mac_f32_e32 v10, v2, v6
1619; GCN-SAFE-NEXT:    v_mac_f32_e32 v9, v1, v5
1620; GCN-SAFE-NEXT:    v_mac_f32_e32 v8, v0, v4
1621; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v8
1622; GCN-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v9
1623; GCN-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v10
1624; GCN-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v11
1625; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1626;
1627; GCN-NSZ-LABEL: v_fneg_fmad_v4f32:
1628; GCN-NSZ:       ; %bb.0:
1629; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1630; GCN-NSZ-NEXT:    v_mad_f32 v0, v0, -v4, -v8
1631; GCN-NSZ-NEXT:    v_mad_f32 v1, v1, -v5, -v9
1632; GCN-NSZ-NEXT:    v_mad_f32 v2, v2, -v6, -v10
1633; GCN-NSZ-NEXT:    v_mad_f32 v3, v3, -v7, -v11
1634; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1635  %fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c)
1636  %fneg = fneg <4 x float> %fma
1637  ret <4 x float> %fneg
1638}
1639
1640define { float, float } @v_fneg_fmad_multi_use_fmad_f32(float %a, float %b, float %c) #0 {
1641; GCN-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f32:
1642; GCN-SAFE:       ; %bb.0:
1643; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644; GCN-SAFE-NEXT:    v_mac_f32_e32 v2, v0, v1
1645; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v2
1646; GCN-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v2
1647; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
1648;
1649; GCN-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f32:
1650; GCN-NSZ:       ; %bb.0:
1651; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1652; GCN-NSZ-NEXT:    v_mad_f32 v0, v0, -v1, -v2
1653; GCN-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1654; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
1655  %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
1656  %fneg = fneg float %fma
1657  %use1 = fmul float %fma, 4.0
1658  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1659  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
1660  ret { float, float } %insert.1
1661}
1662
1663; --------------------------------------------------------------------------------
1664; fp_extend tests
1665; --------------------------------------------------------------------------------
1666
1667define double @v_fneg_fp_extend_f32_to_f64(float %a) #0 {
1668; GCN-LABEL: v_fneg_fp_extend_f32_to_f64:
1669; GCN:       ; %bb.0:
1670; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1671; GCN-NEXT:    v_cvt_f64_f32_e64 v[0:1], -v0
1672; GCN-NEXT:    s_setpc_b64 s[30:31]
1673  %fpext = fpext float %a to double
1674  %fneg = fneg double %fpext
1675  ret double %fneg
1676}
1677
1678define double @v_fneg_fp_extend_fneg_f32_to_f64(float %a) #0 {
1679; GCN-LABEL: v_fneg_fp_extend_fneg_f32_to_f64:
1680; GCN:       ; %bb.0:
1681; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
1683; GCN-NEXT:    s_setpc_b64 s[30:31]
1684  %fneg.a = fneg float %a
1685  %fpext = fpext float %fneg.a to double
1686  %fneg = fneg double %fpext
1687  ret double %fneg
1688}
1689
1690define { double, float } @v_fneg_fp_extend_store_use_fneg_f32_to_f64(float %a) #0 {
1691; GCN-LABEL: v_fneg_fp_extend_store_use_fneg_f32_to_f64:
1692; GCN:       ; %bb.0:
1693; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1694; GCN-NEXT:    v_mov_b32_e32 v2, v0
1695; GCN-NEXT:    v_cvt_f64_f32_e32 v[0:1], v2
1696; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
1697; GCN-NEXT:    s_setpc_b64 s[30:31]
1698  %fneg.a = fneg float %a
1699  %fpext = fpext float %fneg.a to double
1700  %fneg = fneg double %fpext
1701  %insert.0 = insertvalue { double, float } poison, double %fneg, 0
1702  %insert.1 = insertvalue { double, float } %insert.0, float %fneg.a, 1
1703  ret { double, float } %insert.1
1704}
1705
1706define { double, double } @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(float %a) #0 {
1707; GCN-LABEL: v_fneg_multi_use_fp_extend_fneg_f32_to_f64:
1708; GCN:       ; %bb.0:
1709; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1710; GCN-NEXT:    v_cvt_f64_f32_e32 v[2:3], v0
1711; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
1712; GCN-NEXT:    v_mov_b32_e32 v0, v2
1713; GCN-NEXT:    s_setpc_b64 s[30:31]
1714  %fpext = fpext float %a to double
1715  %fneg = fneg double %fpext
1716  %insert.0 = insertvalue { double, double } poison, double %fneg, 0
1717  %insert.1 = insertvalue { double, double } %insert.0, double %fpext, 1
1718  ret { double, double } %insert.1
1719}
1720
1721define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(float %a) #0 {
1722; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1723; SI:       ; %bb.0:
1724; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1725; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
1726; SI-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
1727; SI-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
1728; SI-NEXT:    v_mov_b32_e32 v1, v4
1729; SI-NEXT:    s_setpc_b64 s[30:31]
1730;
1731; VI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64:
1732; VI:       ; %bb.0:
1733; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1734; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
1735; VI-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
1736; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
1737; VI-NEXT:    s_setpc_b64 s[30:31]
1738  %fpext = fpext float %a to double
1739  %fneg = fneg double %fpext
1740  %mul = fmul double %fpext, 4.0
1741  %insert.0 = insertvalue { double, double } poison, double %fneg, 0
1742  %insert.1 = insertvalue { double, double } %insert.0, double %mul, 1
1743  ret { double, double } %insert.1
1744}
1745
1746; FIXME: Source modifiers not folded for f16->f32
1747define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0 {
1748; SI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1749; SI:       ; %bb.0:
1750; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1751; SI-NEXT:    v_mov_b32_e32 v1, v0
1752; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1753; SI-NEXT:    s_setpc_b64 s[30:31]
1754;
1755; VI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
1756; VI:       ; %bb.0:
1757; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1758; VI-NEXT:    v_cvt_f32_f16_e32 v1, v0
1759; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1760; VI-NEXT:    s_setpc_b64 s[30:31]
1761  %fpext = fpext half %a to float
1762  %fneg = fneg float %fpext
1763  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1764  %insert.1 = insertvalue { float, float } %insert.0, float %fpext, 1
1765  ret { float, float } %insert.1
1766}
1767
1768define { float, float } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(half %a) #0 {
1769; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1770; SI:       ; %bb.0:
1771; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1772; SI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
1773; SI-NEXT:    v_mul_f32_e32 v1, 4.0, v0
1774; SI-NEXT:    v_mov_b32_e32 v0, v2
1775; SI-NEXT:    s_setpc_b64 s[30:31]
1776;
1777; VI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32:
1778; VI:       ; %bb.0:
1779; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1780; VI-NEXT:    v_cvt_f32_f16_e32 v1, v0
1781; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1782; VI-NEXT:    v_mul_f32_e32 v1, 4.0, v1
1783; VI-NEXT:    s_setpc_b64 s[30:31]
1784  %fpext = fpext half %a to float
1785  %fneg = fneg float %fpext
1786  %mul = fmul float %fpext, 4.0
1787  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1788  %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1
1789  ret { float, float } %insert.1
1790}
1791
1792; --------------------------------------------------------------------------------
1793; fp_round tests
1794; --------------------------------------------------------------------------------
1795
1796define float @v_fneg_fp_round_f64_to_f32(double %a) #0 {
1797; GCN-LABEL: v_fneg_fp_round_f64_to_f32:
1798; GCN:       ; %bb.0:
1799; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1800; GCN-NEXT:    v_cvt_f32_f64_e64 v0, -v[0:1]
1801; GCN-NEXT:    s_setpc_b64 s[30:31]
1802  %fpround = fptrunc double %a to float
1803  %fneg = fneg float %fpround
1804  ret float %fneg
1805}
1806
1807define float @v_fneg_fp_round_fneg_f64_to_f32(double %a) #0 {
1808; GCN-LABEL: v_fneg_fp_round_fneg_f64_to_f32:
1809; GCN:       ; %bb.0:
1810; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1811; GCN-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
1812; GCN-NEXT:    s_setpc_b64 s[30:31]
1813  %fneg.a = fneg double %a
1814  %fpround = fptrunc double %fneg.a to float
1815  %fneg = fneg float %fpround
1816  ret float %fneg
1817}
1818
1819define { float, double } @v_fneg_fp_round_store_use_fneg_f64_to_f32(double %a) #0 {
1820; GCN-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f32:
1821; GCN:       ; %bb.0:
1822; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1823; GCN-NEXT:    v_cvt_f32_f64_e32 v3, v[0:1]
1824; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
1825; GCN-NEXT:    v_mov_b32_e32 v1, v0
1826; GCN-NEXT:    v_mov_b32_e32 v0, v3
1827; GCN-NEXT:    s_setpc_b64 s[30:31]
1828  %fneg.a = fneg double %a
1829  %fpround = fptrunc double %fneg.a to float
1830  %fneg = fneg float %fpround
1831  %insert.0 = insertvalue { float, double } poison, float %fneg, 0
1832  %insert.1 = insertvalue { float, double } %insert.0, double %fneg.a, 1
1833  ret { float, double } %insert.1
1834}
1835
1836define { float, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f32(double %a, double %c) #0 {
1837; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1838; SI:       ; %bb.0:
1839; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1840; SI-NEXT:    v_cvt_f32_f64_e32 v4, v[0:1]
1841; SI-NEXT:    v_mul_f64 v[1:2], -v[0:1], v[2:3]
1842; SI-NEXT:    v_mov_b32_e32 v0, v4
1843; SI-NEXT:    s_setpc_b64 s[30:31]
1844;
1845; VI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f32:
1846; VI:       ; %bb.0:
1847; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1848; VI-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
1849; VI-NEXT:    v_cvt_f32_f64_e32 v0, v[0:1]
1850; VI-NEXT:    v_mov_b32_e32 v1, v2
1851; VI-NEXT:    v_mov_b32_e32 v2, v3
1852; VI-NEXT:    s_setpc_b64 s[30:31]
1853  %fneg.a = fneg double %a
1854  %fpround = fptrunc double %fneg.a to float
1855  %fneg = fneg float %fpround
1856  %use1 = fmul double %fneg.a, %c
1857  %insert.0 = insertvalue { float, double } poison, float %fneg, 0
1858  %insert.1 = insertvalue { float, double } %insert.0, double %use1, 1
1859  ret { float, double } %insert.1
1860}
1861
1862define half @v_fneg_fp_round_f32_to_f16(float %a) #0 {
1863; SI-LABEL: v_fneg_fp_round_f32_to_f16:
1864; SI:       ; %bb.0:
1865; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1866; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1867; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1868; SI-NEXT:    s_setpc_b64 s[30:31]
1869;
1870; VI-LABEL: v_fneg_fp_round_f32_to_f16:
1871; VI:       ; %bb.0:
1872; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1873; VI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1874; VI-NEXT:    s_setpc_b64 s[30:31]
1875  %fpround = fptrunc float %a to half
1876  %fneg = fneg half %fpround
1877  ret half %fneg
1878}
1879
1880define half @v_fneg_fp_round_fneg_f32_to_f16(float %a) #0 {
1881; SI-LABEL: v_fneg_fp_round_fneg_f32_to_f16:
1882; SI:       ; %bb.0:
1883; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1884; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1885; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1886; SI-NEXT:    s_setpc_b64 s[30:31]
1887;
1888; VI-LABEL: v_fneg_fp_round_fneg_f32_to_f16:
1889; VI:       ; %bb.0:
1890; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1891; VI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1892; VI-NEXT:    s_setpc_b64 s[30:31]
1893  %fneg.a = fneg float %a
1894  %fpround = fptrunc float %fneg.a to half
1895  %fneg = fneg half %fpround
1896  ret half %fneg
1897}
1898
1899define { float, float } @v_fneg_multi_use_fp_round_fneg_f64_to_f32(double %a) #0 {
1900; GCN-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f32:
1901; GCN:       ; %bb.0:
1902; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1903; GCN-NEXT:    v_cvt_f32_f64_e32 v1, v[0:1]
1904; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1905; GCN-NEXT:    s_setpc_b64 s[30:31]
1906  %fpround = fptrunc double %a to float
1907  %fneg = fneg float %fpround
1908  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
1909  %insert.1 = insertvalue { float, float } %insert.0, float %fpround, 1
1910  ret { float, float } %insert.1
1911}
1912
1913define { half, float } @v_fneg_fp_round_store_use_fneg_f32_to_f16(float %a) #0 {
1914; SI-LABEL: v_fneg_fp_round_store_use_fneg_f32_to_f16:
1915; SI:       ; %bb.0:
1916; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1917; SI-NEXT:    v_cvt_f16_f32_e32 v1, v0
1918; SI-NEXT:    v_cvt_f32_f16_e32 v2, v1
1919; SI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
1920; SI-NEXT:    v_mov_b32_e32 v0, v2
1921; SI-NEXT:    s_setpc_b64 s[30:31]
1922;
1923; VI-LABEL: v_fneg_fp_round_store_use_fneg_f32_to_f16:
1924; VI:       ; %bb.0:
1925; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1926; VI-NEXT:    v_cvt_f16_f32_e32 v2, v0
1927; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
1928; VI-NEXT:    v_mov_b32_e32 v0, v2
1929; VI-NEXT:    s_setpc_b64 s[30:31]
1930  %fneg.a = fneg float %a
1931  %fpround = fptrunc float %fneg.a to half
1932  %fneg = fneg half %fpround
1933  %insert.0 = insertvalue { half, float } poison, half %fneg, 0
1934  %insert.1 = insertvalue { half, float } %insert.0, float %fneg.a, 1
1935  ret { half, float } %insert.1
1936}
1937
1938define { half, float } @v_fneg_fp_round_multi_use_fneg_f32_to_f16(float %a, float %c) #0 {
1939; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1940; SI:       ; %bb.0:
1941; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1942; SI-NEXT:    v_cvt_f16_f32_e32 v2, v0
1943; SI-NEXT:    v_mul_f32_e64 v1, -v0, v1
1944; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1945; SI-NEXT:    v_mov_b32_e32 v0, v2
1946; SI-NEXT:    s_setpc_b64 s[30:31]
1947;
1948; VI-LABEL: v_fneg_fp_round_multi_use_fneg_f32_to_f16:
1949; VI:       ; %bb.0:
1950; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951; VI-NEXT:    v_cvt_f16_f32_e32 v2, v0
1952; VI-NEXT:    v_mul_f32_e64 v1, -v0, v1
1953; VI-NEXT:    v_mov_b32_e32 v0, v2
1954; VI-NEXT:    s_setpc_b64 s[30:31]
1955  %fneg.a = fneg float %a
1956  %fpround = fptrunc float %fneg.a to half
1957  %fneg = fneg half %fpround
1958  %use1 = fmul float %fneg.a, %c
1959  %insert.0 = insertvalue { half, float } poison, half %fneg, 0
1960  %insert.1 = insertvalue { half, float } %insert.0, float %use1, 1
1961  ret { half, float } %insert.1
1962}
1963
1964; --------------------------------------------------------------------------------
1965; rcp tests
1966; --------------------------------------------------------------------------------
1967
1968define float @v_fneg_rcp_f32(float %a) #0 {
1969; GCN-LABEL: v_fneg_rcp_f32:
1970; GCN:       ; %bb.0:
1971; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1972; GCN-NEXT:    v_rcp_f32_e64 v0, -v0
1973; GCN-NEXT:    s_setpc_b64 s[30:31]
1974  %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
1975  %fneg = fneg float %rcp
1976  ret float %fneg
1977}
1978
1979define float @v_fneg_rcp_fneg_f32(float %a) #0 {
1980; GCN-LABEL: v_fneg_rcp_fneg_f32:
1981; GCN:       ; %bb.0:
1982; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1983; GCN-NEXT:    v_rcp_f32_e32 v0, v0
1984; GCN-NEXT:    s_setpc_b64 s[30:31]
1985  %fneg.a = fneg float %a
1986  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
1987  %fneg = fneg float %rcp
1988  ret float %fneg
1989}
1990
1991define void @v_fneg_rcp_store_use_fneg_f32(float %a, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) #0 {
1992; GCN-LABEL: v_fneg_rcp_store_use_fneg_f32:
1993; GCN:       ; %bb.0:
1994; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1995; GCN-NEXT:    v_rcp_f32_e32 v5, v0
1996; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1997; GCN-NEXT:    flat_store_dword v[1:2], v5
1998; GCN-NEXT:    flat_store_dword v[3:4], v0
1999; GCN-NEXT:    s_waitcnt vmcnt(0)
2000; GCN-NEXT:    s_setpc_b64 s[30:31]
2001  %fneg.a = fneg float %a
2002  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
2003  %fneg = fneg float %rcp
2004  store float %fneg, ptr addrspace(1) %ptr0
2005  store float %fneg.a, ptr addrspace(1) %ptr1
2006  ret void
2007}
2008
2009define { float, float } @v_fneg_rcp_multi_use_fneg_f32(float %a, float %c) #0 {
2010; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f32:
2011; GCN:       ; %bb.0:
2012; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2013; GCN-NEXT:    v_rcp_f32_e32 v2, v0
2014; GCN-NEXT:    v_mul_f32_e64 v1, -v0, v1
2015; GCN-NEXT:    v_mov_b32_e32 v0, v2
2016; GCN-NEXT:    s_setpc_b64 s[30:31]
2017  %fneg.a = fneg float %a
2018  %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
2019  %fneg = fneg float %rcp
2020  %use1 = fmul float %fneg.a, %c
2021  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2022  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
2023  ret { float, float } %insert.1
2024}
2025
2026; Test getNegatedExpression works for rcp nodes
2027define float @v_negated_rcp_f32(float %arg0, float %arg1) #1 {
2028; GCN-LABEL: v_negated_rcp_f32:
2029; GCN:       ; %bb.0:
2030; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2031; GCN-NEXT:    v_fma_f32 v0, v0, v1, 2.0
2032; GCN-NEXT:    v_rcp_f32_e32 v0, v0
2033; GCN-NEXT:    v_add_f32_e32 v0, v1, v0
2034; GCN-NEXT:    s_setpc_b64 s[30:31]
2035  %neg.arg0 = fneg float %arg0
2036  %fma = call nsz float @llvm.fma.f32(float %neg.arg0, float %arg1, float -2.0)
2037  %rcp0 = call float @llvm.amdgcn.rcp.f32(float %fma)
2038  %mul = fsub nsz float %arg1, %rcp0
2039  ret float %mul
2040}
2041
2042; --------------------------------------------------------------------------------
2043; fmul_legacy tests
2044; --------------------------------------------------------------------------------
2045
2046define float @v_fneg_mul_legacy_f32(float %a, float %b) #0 {
2047; GCN-LABEL: v_fneg_mul_legacy_f32:
2048; GCN:       ; %bb.0:
2049; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2050; GCN-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
2051; GCN-NEXT:    s_setpc_b64 s[30:31]
2052  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
2053  %fneg = fneg float %mul
2054  ret float %fneg
2055}
2056
2057define { float, float } @v_fneg_mul_legacy_store_use_mul_legacy_f32(float %a, float %b) #0 {
2058; GCN-LABEL: v_fneg_mul_legacy_store_use_mul_legacy_f32:
2059; GCN:       ; %bb.0:
2060; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2061; GCN-NEXT:    v_mul_legacy_f32_e32 v1, v0, v1
2062; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
2063; GCN-NEXT:    s_setpc_b64 s[30:31]
2064  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
2065  %fneg = fneg float %mul
2066  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2067  %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1
2068  ret { float, float } %insert.1
2069}
2070
2071define { float, float } @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float %a, float %b) #0 {
2072; GCN-LABEL: v_fneg_mul_legacy_multi_use_mul_legacy_f32:
2073; GCN:       ; %bb.0:
2074; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2075; GCN-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
2076; GCN-NEXT:    v_mul_legacy_f32_e64 v1, -v0, 4.0
2077; GCN-NEXT:    s_setpc_b64 s[30:31]
2078  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
2079  %fneg = fneg float %mul
2080  %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
2081  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2082  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
2083  ret { float, float } %insert.1
2084}
2085
2086define float @v_fneg_mul_legacy_fneg_x_f32(float %a, float %b) #0 {
2087; GCN-LABEL: v_fneg_mul_legacy_fneg_x_f32:
2088; GCN:       ; %bb.0:
2089; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2090; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
2091; GCN-NEXT:    s_setpc_b64 s[30:31]
2092  %fneg.a = fneg float %a
2093  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2094  %fneg = fneg float %mul
2095  ret float %fneg
2096}
2097
2098define float @v_fneg_mul_legacy_x_fneg_f32(float %a, float %b) #0 {
2099; GCN-LABEL: v_fneg_mul_legacy_x_fneg_f32:
2100; GCN:       ; %bb.0:
2101; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2102; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
2103; GCN-NEXT:    s_setpc_b64 s[30:31]
2104  %fneg.b = fneg float %b
2105  %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
2106  %fneg = fneg float %mul
2107  ret float %fneg
2108}
2109
2110define float @v_fneg_mul_legacy_fneg_fneg_f32(float %a, float %b) #0 {
2111; GCN-LABEL: v_fneg_mul_legacy_fneg_fneg_f32:
2112; GCN:       ; %bb.0:
2113; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2114; GCN-NEXT:    v_mul_legacy_f32_e64 v0, v0, -v1
2115; GCN-NEXT:    s_setpc_b64 s[30:31]
2116  %fneg.a = fneg float %a
2117  %fneg.b = fneg float %b
2118  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
2119  %fneg = fneg float %mul
2120  ret float %fneg
2121}
2122
2123define { float, float } @v_fneg_mul_legacy_store_use_fneg_x_f32(float %a, float %b) #0 {
2124; GCN-LABEL: v_fneg_mul_legacy_store_use_fneg_x_f32:
2125; GCN:       ; %bb.0:
2126; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2127; GCN-NEXT:    v_xor_b32_e32 v2, 0x80000000, v0
2128; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
2129; GCN-NEXT:    v_mov_b32_e32 v1, v2
2130; GCN-NEXT:    s_setpc_b64 s[30:31]
2131  %fneg.a = fneg float %a
2132  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2133  %fneg = fneg float %mul
2134
2135  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2136  %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1
2137  ret { float, float } %insert.1
2138}
2139
2140define { float, float } @v_fneg_mul_legacy_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 {
2141; GCN-LABEL: v_fneg_mul_legacy_multi_use_fneg_x_f32:
2142; GCN:       ; %bb.0:
2143; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2144; GCN-NEXT:    v_mul_legacy_f32_e32 v3, v0, v1
2145; GCN-NEXT:    v_mul_legacy_f32_e64 v1, -v0, v2
2146; GCN-NEXT:    v_mov_b32_e32 v0, v3
2147; GCN-NEXT:    s_setpc_b64 s[30:31]
2148  %fneg.a = fneg float %a
2149  %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
2150  %fneg = fneg float %mul
2151  %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
2152  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2153  %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1
2154  ret { float, float } %insert.1
2155}
2156
2157; --------------------------------------------------------------------------------
2158; sin tests
2159; --------------------------------------------------------------------------------
2160
2161define float @v_fneg_sin_f32(float %a) #0 {
2162; GCN-LABEL: v_fneg_sin_f32:
2163; GCN:       ; %bb.0:
2164; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2165; GCN-NEXT:    v_mul_f32_e32 v0, 0xbe22f983, v0
2166; GCN-NEXT:    v_fract_f32_e32 v0, v0
2167; GCN-NEXT:    v_sin_f32_e32 v0, v0
2168; GCN-NEXT:    s_setpc_b64 s[30:31]
2169  %sin = call float @llvm.sin.f32(float %a)
2170  %fneg = fneg float %sin
2171  ret float %fneg
2172}
2173
2174define float @v_fneg_amdgcn_sin_f32(float %a) #0 {
2175; GCN-LABEL: v_fneg_amdgcn_sin_f32:
2176; GCN:       ; %bb.0:
2177; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2178; GCN-NEXT:    v_sin_f32_e64 v0, -v0
2179; GCN-NEXT:    s_setpc_b64 s[30:31]
2180  %sin = call float @llvm.amdgcn.sin.f32(float %a)
2181  %fneg = fneg float %sin
2182  ret float %fneg
2183}
2184
2185; --------------------------------------------------------------------------------
2186; ftrunc tests
2187; --------------------------------------------------------------------------------
2188
2189define float @v_fneg_trunc_f32(float %a) #0 {
2190; GCN-LABEL: v_fneg_trunc_f32:
2191; GCN:       ; %bb.0:
2192; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2193; GCN-NEXT:    v_trunc_f32_e64 v0, -v0
2194; GCN-NEXT:    s_setpc_b64 s[30:31]
2195  %trunc = call float @llvm.trunc.f32(float %a)
2196  %fneg = fneg float %trunc
2197  ret float %fneg
2198}
2199
2200; --------------------------------------------------------------------------------
2201; fround tests
2202; --------------------------------------------------------------------------------
2203
2204define float @v_fneg_round_f32(float %a) #0 {
2205; GCN-SAFE-LABEL: v_fneg_round_f32:
2206; GCN-SAFE:       ; %bb.0:
2207; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2208; GCN-SAFE-NEXT:    v_trunc_f32_e32 v1, v0
2209; GCN-SAFE-NEXT:    v_sub_f32_e32 v2, v0, v1
2210; GCN-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
2211; GCN-SAFE-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
2212; GCN-SAFE-NEXT:    s_brev_b32 s4, -2
2213; GCN-SAFE-NEXT:    v_bfi_b32 v0, s4, v2, v0
2214; GCN-SAFE-NEXT:    v_add_f32_e32 v0, v1, v0
2215; GCN-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2216; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
2217;
2218; GCN-NSZ-LABEL: v_fneg_round_f32:
2219; GCN-NSZ:       ; %bb.0:
2220; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2221; GCN-NSZ-NEXT:    v_trunc_f32_e32 v1, v0
2222; GCN-NSZ-NEXT:    v_sub_f32_e32 v2, v0, v1
2223; GCN-NSZ-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
2224; GCN-NSZ-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
2225; GCN-NSZ-NEXT:    s_brev_b32 s4, -2
2226; GCN-NSZ-NEXT:    v_bfi_b32 v0, s4, v2, v0
2227; GCN-NSZ-NEXT:    v_sub_f32_e64 v0, -v1, v0
2228; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
2229  %round = call float @llvm.round.f32(float %a)
2230  %fneg = fneg float %round
2231  ret float %fneg
2232}
2233
2234; --------------------------------------------------------------------------------
2235; rint tests
2236; --------------------------------------------------------------------------------
2237
2238define float @v_fneg_rint_f32(float %a) #0 {
2239; GCN-LABEL: v_fneg_rint_f32:
2240; GCN:       ; %bb.0:
2241; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2242; GCN-NEXT:    v_rndne_f32_e64 v0, -v0
2243; GCN-NEXT:    s_setpc_b64 s[30:31]
2244  %rint = call float @llvm.rint.f32(float %a)
2245  %fneg = fneg float %rint
2246  ret float %fneg
2247}
2248
2249; --------------------------------------------------------------------------------
2250; nearbyint tests
2251; --------------------------------------------------------------------------------
2252
2253define float @v_fneg_nearbyint_f32(float %a) #0 {
2254; GCN-LABEL: v_fneg_nearbyint_f32:
2255; GCN:       ; %bb.0:
2256; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2257; GCN-NEXT:    v_rndne_f32_e64 v0, -v0
2258; GCN-NEXT:    s_setpc_b64 s[30:31]
2259  %nearbyint = call float @llvm.nearbyint.f32(float %a)
2260  %fneg = fneg float %nearbyint
2261  ret float %fneg
2262}
2263
2264; --------------------------------------------------------------------------------
2265; fcanonicalize tests
2266; --------------------------------------------------------------------------------
2267
2268define float @v_fneg_canonicalize_f32(float %a) #0 {
2269; GCN-LABEL: v_fneg_canonicalize_f32:
2270; GCN:       ; %bb.0:
2271; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2272; GCN-NEXT:    v_mul_f32_e32 v0, -1.0, v0
2273; GCN-NEXT:    s_setpc_b64 s[30:31]
2274  %trunc = call float @llvm.canonicalize.f32(float %a)
2275  %fneg = fneg float %trunc
2276  ret float %fneg
2277}
2278
2279; --------------------------------------------------------------------------------
2280; arithmetic.fence tests
2281; --------------------------------------------------------------------------------
2282
2283define float @v_fneg_arithmetic_fence_f32(float %a) #0 {
2284; GCN-LABEL: v_fneg_arithmetic_fence_f32:
2285; GCN:       ; %bb.0:
2286; GCN-NEXT:    ;ARITH_FENCE
2287; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2288; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2289; GCN-NEXT:    s_setpc_b64 s[30:31]
2290  %fence = call float @llvm.arithmetic.fence.f32(float %a)
2291  %fneg = fneg float %fence
2292  ret float %fneg
2293}
2294
2295define float @v_fneg_arithmetic_fence_fmul_f32(float %a, float %b) #0 {
2296; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f32:
2297; GCN:       ; %bb.0:
2298; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2299; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
2300; GCN-NEXT:    ;ARITH_FENCE
2301; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2302; GCN-NEXT:    s_setpc_b64 s[30:31]
2303  %mul = fmul float %a, %b
2304  %fence = call float @llvm.arithmetic.fence.f32(float %mul)
2305  %fneg = fneg float %fence
2306  ret float %fneg
2307}
2308
2309; --------------------------------------------------------------------------------
2310; vintrp tests
2311; --------------------------------------------------------------------------------
2312
2313define { float, float } @v_fneg_interp_p1_f32(float %a, float %b) #0 {
2314; SI-LABEL: v_fneg_interp_p1_f32:
2315; SI:       ; %bb.0:
2316; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2317; SI-NEXT:    v_mul_f32_e64 v1, v0, -v1
2318; SI-NEXT:    s_mov_b32 m0, 0
2319; SI-NEXT:    v_interp_p1_f32 v0, v1, attr0.x
2320; SI-NEXT:    v_interp_p1_f32 v1, v1, attr0.y
2321; SI-NEXT:    s_setpc_b64 s[30:31]
2322;
2323; VI-LABEL: v_fneg_interp_p1_f32:
2324; VI:       ; %bb.0:
2325; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2326; VI-NEXT:    v_mul_f32_e64 v1, v0, -v1
2327; VI-NEXT:    s_mov_b32 m0, 0
2328; VI-NEXT:    v_interp_p1_f32_e32 v0, v1, attr0.x
2329; VI-NEXT:    v_interp_p1_f32_e32 v1, v1, attr0.y
2330; VI-NEXT:    s_setpc_b64 s[30:31]
2331  %mul = fmul float %a, %b
2332  %fneg = fneg float %mul
2333  %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
2334  %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
2335  %insert.0 = insertvalue { float, float } poison, float %intrp0, 0
2336  %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
2337  ret { float, float } %insert.1
2338}
2339
2340define { float, float } @v_fneg_interp_p2_f32(float %a, float %b) #0 {
2341; SI-LABEL: v_fneg_interp_p2_f32:
2342; SI:       ; %bb.0:
2343; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2344; SI-NEXT:    v_mul_f32_e64 v2, v0, -v1
2345; SI-NEXT:    v_mov_b32_e32 v1, 4.0
2346; SI-NEXT:    v_mov_b32_e32 v0, 4.0
2347; SI-NEXT:    s_mov_b32 m0, 0
2348; SI-NEXT:    v_interp_p2_f32 v0, v2, attr0.x
2349; SI-NEXT:    v_interp_p2_f32 v1, v2, attr0.y
2350; SI-NEXT:    s_setpc_b64 s[30:31]
2351;
2352; VI-LABEL: v_fneg_interp_p2_f32:
2353; VI:       ; %bb.0:
2354; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2355; VI-NEXT:    v_mul_f32_e64 v2, v0, -v1
2356; VI-NEXT:    v_mov_b32_e32 v1, 4.0
2357; VI-NEXT:    v_mov_b32_e32 v0, 4.0
2358; VI-NEXT:    s_mov_b32 m0, 0
2359; VI-NEXT:    v_interp_p2_f32_e32 v0, v2, attr0.x
2360; VI-NEXT:    v_interp_p2_f32_e32 v1, v2, attr0.y
2361; VI-NEXT:    s_setpc_b64 s[30:31]
2362  %mul = fmul float %a, %b
2363  %fneg = fneg float %mul
2364  %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
2365  %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
2366  %insert.0 = insertvalue { float, float } poison, float %intrp0, 0
2367  %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1
2368  ret { float, float } %insert.1
2369}
2370
2371; --------------------------------------------------------------------------------
2372; CopyToReg tests
2373; --------------------------------------------------------------------------------
2374
2375define void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) #0 {
2376; SI-LABEL: v_fneg_copytoreg_f32:
2377; SI:       ; %bb.0:
2378; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2379; SI-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
2380; SI-NEXT:    v_lshlrev_b32_e32 v6, 2, v6
2381; SI-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
2382; SI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2383; SI-NEXT:    v_mul_f32_e32 v2, v2, v3
2384; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
2385; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2386; SI-NEXT:    s_cbranch_execz .LBB118_2
2387; SI-NEXT:  ; %bb.1: ; %if
2388; SI-NEXT:    v_mul_f32_e64 v3, -v2, v4
2389; SI-NEXT:    flat_store_dword v[0:1], v3
2390; SI-NEXT:    s_waitcnt vmcnt(0)
2391; SI-NEXT:  .LBB118_2: ; %endif
2392; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
2393; SI-NEXT:    flat_store_dword v[0:1], v2
2394; SI-NEXT:    s_waitcnt vmcnt(0)
2395; SI-NEXT:    s_setpc_b64 s[30:31]
2396;
2397; VI-LABEL: v_fneg_copytoreg_f32:
2398; VI:       ; %bb.0:
2399; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2400; VI-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
2401; VI-NEXT:    v_lshlrev_b32_e32 v6, 2, v6
2402; VI-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
2403; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
2404; VI-NEXT:    v_mul_f32_e32 v2, v2, v3
2405; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
2406; VI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
2407; VI-NEXT:    s_cbranch_execz .LBB118_2
2408; VI-NEXT:  ; %bb.1: ; %if
2409; VI-NEXT:    v_mul_f32_e64 v3, -v2, v4
2410; VI-NEXT:    flat_store_dword v[0:1], v3
2411; VI-NEXT:    s_waitcnt vmcnt(0)
2412; VI-NEXT:  .LBB118_2: ; %endif
2413; VI-NEXT:    s_or_b64 exec, exec, s[4:5]
2414; VI-NEXT:    flat_store_dword v[0:1], v2
2415; VI-NEXT:    s_waitcnt vmcnt(0)
2416; VI-NEXT:    s_setpc_b64 s[30:31]
2417  %tid = call i32 @llvm.amdgcn.workitem.id.x()
2418  %tid.ext = sext i32 %tid to i64
2419  %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext
2420  %mul = fmul float %a, %b
2421  %fneg = fneg float %mul
2422  %cmp0 = icmp eq i32 %d, 0
2423  br i1 %cmp0, label %if, label %endif
2424
2425if:
2426  %mul1 = fmul float %fneg, %c
2427  store volatile float %mul1, ptr addrspace(1) %out.gep
2428  br label %endif
2429
2430endif:
2431  store volatile float %mul, ptr addrspace(1) %out.gep
2432  ret void
2433}
2434
2435; --------------------------------------------------------------------------------
2436; inlineasm tests
2437; --------------------------------------------------------------------------------
2438
2439; Can't fold into use, so should fold into source
2440define float @v_fneg_inlineasm_f32(float %a, float %b, float %c, i32 %d) #0 {
2441; GCN-LABEL: v_fneg_inlineasm_f32:
2442; GCN:       ; %bb.0:
2443; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2444; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v1
2445; GCN-NEXT:    ;;#ASMSTART
2446; GCN-NEXT:    ; use v0
2447; GCN-NEXT:    ;;#ASMEND
2448; GCN-NEXT:    s_setpc_b64 s[30:31]
2449  %mul = fmul float %a, %b
2450  %fneg = fneg float %mul
2451  call void asm sideeffect "; use $0", "v"(float %fneg) #0
2452  ret float %fneg
2453}
2454
2455; --------------------------------------------------------------------------------
2456; inlineasm tests
2457; --------------------------------------------------------------------------------
2458
2459; Can't fold into use, so should fold into source
2460define float @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) #0 {
2461; GCN-LABEL: v_fneg_inlineasm_multi_use_src_f32:
2462; GCN:       ; %bb.0:
2463; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2464; GCN-NEXT:    v_mul_f32_e32 v0, v2, v3
2465; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
2466; GCN-NEXT:    ;;#ASMSTART
2467; GCN-NEXT:    ; use v1
2468; GCN-NEXT:    ;;#ASMEND
2469; GCN-NEXT:    s_setpc_b64 s[30:31]
2470  %mul = fmul float %a, %b
2471  %fneg = fneg float %mul
2472  call void asm sideeffect "; use $0", "v"(float %fneg) #0
2473  ret float %mul
2474}
2475
2476; --------------------------------------------------------------------------------
2477; code size regression tests
2478; --------------------------------------------------------------------------------
2479
2480; There are multiple users of the fneg that must use a VOP3
2481; instruction, so there is no penalty
2482define { float, float } @multiuse_fneg_2_vop3_users_f32(float %a, float %b, float %c) #0 {
2483; GCN-LABEL: multiuse_fneg_2_vop3_users_f32:
2484; GCN:       ; %bb.0:
2485; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2486; GCN-NEXT:    v_fma_f32 v3, -v0, v1, v2
2487; GCN-NEXT:    v_fma_f32 v1, -v0, v2, 2.0
2488; GCN-NEXT:    v_mov_b32_e32 v0, v3
2489; GCN-NEXT:    s_setpc_b64 s[30:31]
2490  %fneg.a = fneg float %a
2491  %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
2492  %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
2493  %insert.0 = insertvalue { float, float } poison, float %fma0, 0
2494  %insert.1 = insertvalue { float, float } %insert.0, float %fma1, 1
2495  ret { float, float } %insert.1
2496}
2497
2498; There are multiple users, but both require using a larger encoding
2499; for the modifier.
2500define { float, float } @multiuse_fneg_2_vop2_users_f32(float %a, float %b, float %c) #0 {
2501; GCN-LABEL: multiuse_fneg_2_vop2_users_f32:
2502; GCN:       ; %bb.0:
2503; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2504; GCN-NEXT:    v_mul_f32_e64 v3, -v0, v1
2505; GCN-NEXT:    v_mul_f32_e64 v1, -v0, v2
2506; GCN-NEXT:    v_mov_b32_e32 v0, v3
2507; GCN-NEXT:    s_setpc_b64 s[30:31]
2508  %fneg.a = fneg float %a
2509  %mul0 = fmul float %fneg.a, %b
2510  %mul1 = fmul float %fneg.a, %c
2511  %insert.0 = insertvalue { float, float } poison, float %mul0, 0
2512  %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1
2513  ret { float, float } %insert.1
2514}
2515
2516; One user is VOP3 so has no cost to folding the modifier, the other does.
2517define { float, float } @multiuse_fneg_vop2_vop3_users_f32(float %a, float %b, float %c) #0 {
2518; GCN-LABEL: multiuse_fneg_vop2_vop3_users_f32:
2519; GCN:       ; %bb.0:
2520; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2521; GCN-NEXT:    v_fma_f32 v3, -v0, v1, 2.0
2522; GCN-NEXT:    v_mul_f32_e64 v1, -v0, v2
2523; GCN-NEXT:    v_mov_b32_e32 v0, v3
2524; GCN-NEXT:    s_setpc_b64 s[30:31]
2525  %fneg.a = fneg float %a
2526  %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
2527  %mul1 = fmul float %fneg.a, %c
2528
2529  %insert.0 = insertvalue { float, float } poison, float %fma0, 0
2530  %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1
2531  ret { float, float } %insert.1
2532}
2533
2534; The use of the fneg requires a code size increase, but folding into
2535; the source does not
2536define { float, float } @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, float %a, float %b, float %c, float %d) #0 {
2537; GCN-SAFE-LABEL: free_fold_src_code_size_cost_use_f32:
2538; GCN-SAFE:       ; %bb.0:
2539; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2540; GCN-SAFE-NEXT:    v_fma_f32 v1, v2, v3, 2.0
2541; GCN-SAFE-NEXT:    v_mul_f32_e64 v0, -v1, v4
2542; GCN-SAFE-NEXT:    v_mul_f32_e64 v1, -v1, v5
2543; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
2544;
2545; GCN-NSZ-LABEL: free_fold_src_code_size_cost_use_f32:
2546; GCN-NSZ:       ; %bb.0:
2547; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2548; GCN-NSZ-NEXT:    v_fma_f32 v1, v2, -v3, -2.0
2549; GCN-NSZ-NEXT:    v_mul_f32_e32 v0, v1, v4
2550; GCN-NSZ-NEXT:    v_mul_f32_e32 v1, v1, v5
2551; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
2552  %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
2553  %fneg.fma0 = fneg float %fma0
2554  %mul1 = fmul float %fneg.fma0, %c
2555  %mul2 = fmul float %fneg.fma0, %d
2556
2557  %insert.0 = insertvalue { float, float } poison, float %mul1, 0
2558  %insert.1 = insertvalue { float, float } %insert.0, float %mul2, 1
2559  ret { float, float } %insert.1
2560}
2561
2562define { double, double } @free_fold_src_code_size_cost_use_f64(double %a, double %b, double %c, double %d) #0 {
2563; GCN-LABEL: free_fold_src_code_size_cost_use_f64:
2564; GCN:       ; %bb.0:
2565; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2566; GCN-NEXT:    v_fma_f64 v[2:3], v[0:1], v[2:3], 2.0
2567; GCN-NEXT:    v_mul_f64 v[0:1], -v[2:3], v[4:5]
2568; GCN-NEXT:    v_mul_f64 v[2:3], -v[2:3], v[6:7]
2569; GCN-NEXT:    s_setpc_b64 s[30:31]
2570  %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0)
2571  %fneg.fma0 = fneg double %fma0
2572  %mul1 = fmul double %fneg.fma0, %c
2573  %mul2 = fmul double %fneg.fma0, %d
2574
2575  %insert.0 = insertvalue { double, double } poison, double %mul1, 0
2576  %insert.1 = insertvalue { double, double } %insert.0, double %mul2, 1
2577  ret { double, double } %insert.1
2578}
2579
2580; %trunc.a has one fneg use, but it requires a code size increase and
2581; %the fneg can instead be folded for free into the fma.
2582define float @one_use_cost_to_fold_into_src_f32(float %a, float %b, float %c, float %d) #0 {
2583; GCN-LABEL: one_use_cost_to_fold_into_src_f32:
2584; GCN:       ; %bb.0:
2585; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2586; GCN-NEXT:    v_trunc_f32_e32 v0, v0
2587; GCN-NEXT:    v_fma_f32 v0, -v0, v1, v2
2588; GCN-NEXT:    s_setpc_b64 s[30:31]
2589  %trunc.a = call float @llvm.trunc.f32(float %a)
2590  %trunc.fneg.a = fneg float %trunc.a
2591  %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2592  ret float %fma0
2593}
2594
2595define { float, float } @multi_use_cost_to_fold_into_src(float %a, float %b, float %c, float %d) #0 {
2596; GCN-LABEL: multi_use_cost_to_fold_into_src:
2597; GCN:       ; %bb.0:
2598; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2599; GCN-NEXT:    v_trunc_f32_e32 v4, v0
2600; GCN-NEXT:    v_fma_f32 v0, -v4, v1, v2
2601; GCN-NEXT:    v_mul_f32_e32 v1, v4, v3
2602; GCN-NEXT:    s_setpc_b64 s[30:31]
2603  %trunc.a = call float @llvm.trunc.f32(float %a)
2604  %trunc.fneg.a = fneg float %trunc.a
2605  %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
2606  %mul1 = fmul float %trunc.a, %d
2607  %insert.0 = insertvalue { float, float } poison, float %fma0, 0
2608  %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1
2609  ret { float, float } %insert.1
2610}
2611
2612; The AMDGPU combine to pull fneg into the FMA operands was being
2613; undone by the generic combine to pull the fneg out of the fma if
2614; !isFNegFree. We were reporting false for v2f32 even though it will
2615; be split into f32 where it will be free.
2616define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
2617; GCN-LABEL: fneg_fma_fneg_dagcombine_loop:
2618; GCN:       ; %bb.0: ; %bb
2619; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2620; GCN-NEXT:    s_brev_b32 s4, 1
2621; GCN-NEXT:    v_fma_f32 v3, v3, -v5, s4
2622; GCN-NEXT:    v_fma_f32 v2, v2, -v4, s4
2623; GCN-NEXT:    v_sub_f32_e32 v1, v3, v1
2624; GCN-NEXT:    v_sub_f32_e32 v0, v2, v0
2625; GCN-NEXT:    v_mul_f32_e32 v0, v0, v4
2626; GCN-NEXT:    v_mul_f32_e32 v1, v1, v5
2627; GCN-NEXT:    s_setpc_b64 s[30:31]
2628bb:
2629  %i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer)
2630  %i4 = fadd fast <2 x float> %i3, %arg
2631  %i5 = fneg <2 x float> %i4
2632  %i6 = fmul fast <2 x float> %i5, %arg2
2633  ret <2 x float> %i6
2634}
2635
2636; This expects denormal flushing, so can't turn this fmul into fneg
2637; TODO: Keeping this as fmul saves encoding size
2638define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 {
2639; GCN-LABEL: nnan_fmul_neg1_to_fneg:
2640; GCN:       ; %bb.0:
2641; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2642; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
2643; GCN-NEXT:    s_setpc_b64 s[30:31]
2644  %mul = fmul float %x, -1.0
2645  %add = fmul nnan float %mul, %y
2646  ret float %add
2647}
2648
2649; It's legal to turn this fmul into an fneg since denormals are
2650; preserved and we know an snan can't happen from the flag.
2651define float @denormal_fmul_neg1_to_fneg(float %x, float %y) {
2652; GCN-LABEL: denormal_fmul_neg1_to_fneg:
2653; GCN:       ; %bb.0:
2654; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2655; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
2656; GCN-NEXT:    s_setpc_b64 s[30:31]
2657  %mul = fmul nnan float %x, -1.0
2658  %add = fmul float %mul, %y
2659  ret float %add
2660}
2661
2662; know the source can't be an snan
2663define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) {
2664; GCN-LABEL: denorm_snan_fmul_neg1_to_fneg:
2665; GCN:       ; %bb.0:
2666; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2667; GCN-NEXT:    v_mul_f32_e64 v0, v0, -v0
2668; GCN-NEXT:    v_mul_f32_e32 v0, v0, v1
2669; GCN-NEXT:    s_setpc_b64 s[30:31]
2670  %canonical = fmul float %x, %x
2671  %mul = fmul float %canonical, -1.0
2672  %add = fmul float %mul, %y
2673  ret float %add
2674}
2675
2676define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 {
2677; GCN-LABEL: flush_snan_fmul_neg1_to_fneg:
2678; GCN:       ; %bb.0:
2679; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2680; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
2681; GCN-NEXT:    v_mul_f32_e64 v0, -v0, v1
2682; GCN-NEXT:    s_setpc_b64 s[30:31]
2683  %quiet = call float @llvm.canonicalize.f32(float %x)
2684  %mul = fmul float %quiet, -1.0
2685  %add = fmul float %mul, %y
2686  ret float %add
2687}
2688
2689define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) {
2690; GCN-LABEL: fadd_select_fneg_fneg_f32:
2691; GCN:       ; %bb.0:
2692; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2693; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2694; GCN-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2695; GCN-NEXT:    v_sub_f32_e32 v0, v3, v0
2696; GCN-NEXT:    s_setpc_b64 s[30:31]
2697  %cmp = icmp eq i32 %arg0, 0
2698  %neg.x = fneg float %x
2699  %neg.y  = fneg float %y
2700  %select = select i1 %cmp, float %neg.x, float %neg.y
2701  %add = fadd float %select, %z
2702  ret float %add
2703}
2704
2705define double @fadd_select_fneg_fneg_f64(i32 %arg0, double %x, double %y, double %z) {
2706; GCN-LABEL: fadd_select_fneg_fneg_f64:
2707; GCN:       ; %bb.0:
2708; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2709; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2710; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
2711; GCN-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
2712; GCN-NEXT:    v_add_f64 v[0:1], v[5:6], -v[1:2]
2713; GCN-NEXT:    s_setpc_b64 s[30:31]
2714  %cmp = icmp eq i32 %arg0, 0
2715  %neg.x = fneg double %x
2716  %neg.y  = fneg double %y
2717  %select = select i1 %cmp, double %neg.x, double %neg.y
2718  %add = fadd double %select, %z
2719  ret double %add
2720}
2721
2722define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
2723; SI-LABEL: fadd_select_fneg_fneg_f16:
2724; SI:       ; %bb.0:
2725; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2726; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
2727; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
2728; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
2729; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2730; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2731; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
2732; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
2733; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2734; SI-NEXT:    v_sub_f32_e32 v0, v3, v0
2735; SI-NEXT:    s_setpc_b64 s[30:31]
2736;
2737; VI-LABEL: fadd_select_fneg_fneg_f16:
2738; VI:       ; %bb.0:
2739; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2740; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2741; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2742; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
2743; VI-NEXT:    s_setpc_b64 s[30:31]
2744  %cmp = icmp eq i32 %arg0, 0
2745  %neg.x = fneg half %x
2746  %neg.y = fneg half %y
2747  %select = select i1 %cmp, half %neg.x, half %neg.y
2748  %add = fadd half %select, %z
2749  ret half %add
2750}
2751
2752; FIXME: Terrible code for SI
2753define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
2754; SI-LABEL: fadd_select_fneg_fneg_v2f16:
2755; SI:       ; %bb.0:
2756; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2757; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
2758; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
2759; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
2760; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
2761; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2762; SI-NEXT:    v_or_b32_e32 v1, v1, v2
2763; SI-NEXT:    v_cvt_f16_f32_e32 v2, v4
2764; SI-NEXT:    v_cvt_f16_f32_e32 v4, v6
2765; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2766; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
2767; SI-NEXT:    v_or_b32_e32 v2, v3, v2
2768; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2769; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
2770; SI-NEXT:    v_cvt_f32_f16_e32 v3, v4
2771; SI-NEXT:    v_cvt_f32_f16_e32 v4, v5
2772; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2773; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2774; SI-NEXT:    v_sub_f32_e32 v0, v4, v0
2775; SI-NEXT:    v_sub_f32_e32 v1, v3, v1
2776; SI-NEXT:    s_setpc_b64 s[30:31]
2777;
2778; VI-LABEL: fadd_select_fneg_fneg_v2f16:
2779; VI:       ; %bb.0:
2780; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2781; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
2782; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
2783; VI-NEXT:    v_sub_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
2784; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
2785; VI-NEXT:    v_or_b32_e32 v0, v0, v1
2786; VI-NEXT:    s_setpc_b64 s[30:31]
2787  %cmp = icmp eq i32 %arg0, 0
2788  %neg.x = fneg <2 x half> %x
2789  %neg.y = fneg <2 x half> %y
2790  %select = select i1 %cmp, <2 x half> %neg.x, <2 x half> %neg.y
2791  %add = fadd <2 x half> %select, %z
2792  ret <2 x half> %add
2793}
2794
2795; --------------------------------------------------------------------------------
2796; select tests
2797; --------------------------------------------------------------------------------
2798
2799define amdgpu_kernel void @s_fneg_select_infloop_regression_f32(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
2800; SI-LABEL: s_fneg_select_infloop_regression_f32:
2801; SI:       ; %bb.0:
2802; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
2803; SI-NEXT:    s_waitcnt lgkmcnt(0)
2804; SI-NEXT:    s_bitcmp1_b32 s1, 0
2805; SI-NEXT:    v_mov_b32_e32 v0, s0
2806; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
2807; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
2808; SI-NEXT:    v_cndmask_b32_e64 v2, -v0, 0, s[0:1]
2809; SI-NEXT:    v_mov_b32_e32 v0, s2
2810; SI-NEXT:    v_mov_b32_e32 v1, s3
2811; SI-NEXT:    flat_store_dword v[0:1], v2
2812; SI-NEXT:    s_endpgm
2813;
2814; VI-LABEL: s_fneg_select_infloop_regression_f32:
2815; VI:       ; %bb.0:
2816; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
2817; VI-NEXT:    s_waitcnt lgkmcnt(0)
2818; VI-NEXT:    s_bitcmp1_b32 s1, 0
2819; VI-NEXT:    v_mov_b32_e32 v0, s0
2820; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
2821; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
2822; VI-NEXT:    v_cndmask_b32_e64 v2, -v0, 0, s[0:1]
2823; VI-NEXT:    v_mov_b32_e32 v0, s2
2824; VI-NEXT:    v_mov_b32_e32 v1, s3
2825; VI-NEXT:    flat_store_dword v[0:1], v2
2826; VI-NEXT:    s_endpgm
2827  %i = select i1 %arg1, float 0.0, float %arg
2828  %i2 = fneg float %i
2829  %i3 = select i1 %arg1, float 0.0, float %i2
2830  store float %i3, ptr addrspace(1) %ptr, align 4
2831  ret void
2832}
2833
2834define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) {
2835; GCN-LABEL: v_fneg_select_infloop_regression_f32:
2836; GCN:       ; %bb.0:
2837; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2838; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2839; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2840; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
2841; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
2842; GCN-NEXT:    s_setpc_b64 s[30:31]
2843  %i = select i1 %arg1, float 0.0, float %arg
2844  %i2 = fneg float %i
2845  %i3 = select i1 %arg1, float 0.0, float %i2
2846  ret float %i3
2847}
2848
2849define float @v_fneg_select_infloop_regression_f32_commute0(float %arg, i1 %arg1) {
2850; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute0:
2851; GCN:       ; %bb.0:
2852; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2853; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2854; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2855; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
2856; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
2857; GCN-NEXT:    s_setpc_b64 s[30:31]
2858  %i = select i1 %arg1, float %arg, float 0.0
2859  %i2 = fneg float %i
2860  %i3 = select i1 %arg1, float 0.0, float %i2
2861  ret float %i3
2862}
2863
2864define float @v_fneg_select_infloop_regression_f32_commute1(float %arg, i1 %arg1) {
2865; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute1:
2866; GCN:       ; %bb.0:
2867; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2868; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2869; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2870; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
2871; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -v0, vcc
2872; GCN-NEXT:    s_setpc_b64 s[30:31]
2873  %i = select i1 %arg1, float 0.0, float %arg
2874  %i2 = fneg float %i
2875  %i3 = select i1 %arg1, float %i2, float 0.0
2876  ret float %i3
2877}
2878
2879define float @v_fneg_select_infloop_regression_f32_commute2(float %arg, i1 %arg1) {
2880; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute2:
2881; GCN:       ; %bb.0:
2882; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2883; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2884; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2885; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
2886; GCN-NEXT:    v_cndmask_b32_e64 v0, 0, -v0, vcc
2887; GCN-NEXT:    s_setpc_b64 s[30:31]
2888  %i = select i1 %arg1, float %arg, float 0.0
2889  %i2 = fneg float %i
2890  %i3 = select i1 %arg1, float %i2, float 0.0
2891  ret float %i3
2892}
2893
2894; Check with an inline constant that's equally cheap to negate
2895define float @v_fneg_select_infloop_regression_inline_imm_f32(float %arg, i1 %arg1) {
2896; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32:
2897; GCN:       ; %bb.0:
2898; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2899; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2900; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2901; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 2.0, vcc
2902; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 2.0, vcc
2903; GCN-NEXT:    s_setpc_b64 s[30:31]
2904  %i = select i1 %arg1, float 2.0, float %arg
2905  %i2 = fneg float %i
2906  %i3 = select i1 %arg1, float 2.0, float %i2
2907  ret float %i3
2908}
2909
2910define float @v_fneg_select_infloop_regression_inline_imm_f32_commute0(float %arg, i1 %arg1) {
2911; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute0:
2912; GCN:       ; %bb.0:
2913; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2914; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2915; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2916; GCN-NEXT:    v_cndmask_b32_e32 v0, 2.0, v0, vcc
2917; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, 2.0, vcc
2918; GCN-NEXT:    s_setpc_b64 s[30:31]
2919  %i = select i1 %arg1, float %arg, float 2.0
2920  %i2 = fneg float %i
2921  %i3 = select i1 %arg1, float 2.0, float %i2
2922  ret float %i3
2923}
2924
2925define float @v_fneg_select_infloop_regression_inline_imm_f32_commute1(float %arg, i1 %arg1) {
2926; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute1:
2927; GCN:       ; %bb.0:
2928; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2929; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2930; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2931; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 2.0, vcc
2932; GCN-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v0, vcc
2933; GCN-NEXT:    s_setpc_b64 s[30:31]
2934  %i = select i1 %arg1, float 2.0, float %arg
2935  %i2 = fneg float %i
2936  %i3 = select i1 %arg1, float %i2, float 2.0
2937  ret float %i3
2938}
2939
2940define float @v_fneg_select_infloop_regression_inline_imm_f32_commute2(float %arg, i1 %arg1) {
2941; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute2:
2942; GCN:       ; %bb.0:
2943; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2944; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2945; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2946; GCN-NEXT:    v_cndmask_b32_e32 v0, 2.0, v0, vcc
2947; GCN-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v0, vcc
2948; GCN-NEXT:    s_setpc_b64 s[30:31]
2949  %i = select i1 %arg1, float %arg, float 2.0
2950  %i2 = fneg float %i
2951  %i3 = select i1 %arg1, float %i2, float 2.0
2952  ret float %i3
2953}
2954
2955; Check with an inline constant that's equally cheap to negate
2956define float @v_fneg_select_infloop_regression_neg_inline_imm_f32(float %arg, i1 %arg1) {
2957; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32:
2958; GCN:       ; %bb.0:
2959; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2960; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2961; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2962; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -2.0, vcc
2963; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, -2.0, vcc
2964; GCN-NEXT:    s_setpc_b64 s[30:31]
2965  %i = select i1 %arg1, float -2.0, float %arg
2966  %i2 = fneg float %i
2967  %i3 = select i1 %arg1, float -2.0, float %i2
2968  ret float %i3
2969}
2970
2971define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute0(float %arg, i1 %arg1) {
2972; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute0:
2973; GCN:       ; %bb.0:
2974; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2975; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2976; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2977; GCN-NEXT:    v_cndmask_b32_e32 v0, -2.0, v0, vcc
2978; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, -2.0, vcc
2979; GCN-NEXT:    s_setpc_b64 s[30:31]
2980  %i = select i1 %arg1, float %arg, float -2.0
2981  %i2 = fneg float %i
2982  %i3 = select i1 %arg1, float -2.0, float %i2
2983  ret float %i3
2984}
2985
2986define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1(float %arg, i1 %arg1) {
2987; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1:
2988; GCN:       ; %bb.0:
2989; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2990; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
2991; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
2992; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -2.0, vcc
2993; GCN-NEXT:    v_cndmask_b32_e64 v0, -2.0, -v0, vcc
2994; GCN-NEXT:    s_setpc_b64 s[30:31]
2995  %i = select i1 %arg1, float -2.0, float %arg
2996  %i2 = fneg float %i
2997  %i3 = select i1 %arg1, float %i2, float -2.0
2998  ret float %i3
2999}
3000
3001define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute2(float %arg, i1 %arg1) {
3002; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute2:
3003; GCN:       ; %bb.0:
3004; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3005; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
3006; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3007; GCN-NEXT:    v_cndmask_b32_e32 v0, -2.0, v0, vcc
3008; GCN-NEXT:    v_cndmask_b32_e64 v0, -2.0, -v0, vcc
3009; GCN-NEXT:    s_setpc_b64 s[30:31]
3010  %i = select i1 %arg1, float %arg, float -2.0
3011  %i2 = fneg float %i
3012  %i3 = select i1 %arg1, float %i2, float -2.0
3013  ret float %i3
3014}
3015
3016define amdgpu_kernel void @s_fneg_select_infloop_regression_f64(double %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3017; SI-LABEL: s_fneg_select_infloop_regression_f64:
3018; SI:       ; %bb.0:
3019; SI-NEXT:    s_load_dword s6, s[4:5], 0xb
3020; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x9
3021; SI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0xd
3022; SI-NEXT:    v_bfrev_b32_e32 v0, 1
3023; SI-NEXT:    s_waitcnt lgkmcnt(0)
3024; SI-NEXT:    s_bitcmp1_b32 s6, 0
3025; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
3026; SI-NEXT:    v_mov_b32_e32 v1, s1
3027; SI-NEXT:    s_and_b64 s[6:7], s[4:5], exec
3028; SI-NEXT:    v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
3029; SI-NEXT:    s_cselect_b32 s0, 0, s0
3030; SI-NEXT:    v_mov_b32_e32 v2, s2
3031; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[4:5]
3032; SI-NEXT:    v_mov_b32_e32 v0, s0
3033; SI-NEXT:    v_mov_b32_e32 v3, s3
3034; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3035; SI-NEXT:    s_endpgm
3036;
3037; VI-LABEL: s_fneg_select_infloop_regression_f64:
3038; VI:       ; %bb.0:
3039; VI-NEXT:    s_load_dword s6, s[4:5], 0x2c
3040; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
3041; VI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x34
3042; VI-NEXT:    v_bfrev_b32_e32 v0, 1
3043; VI-NEXT:    s_waitcnt lgkmcnt(0)
3044; VI-NEXT:    s_bitcmp1_b32 s6, 0
3045; VI-NEXT:    s_cselect_b64 s[4:5], -1, 0
3046; VI-NEXT:    v_mov_b32_e32 v1, s1
3047; VI-NEXT:    s_and_b64 s[6:7], s[4:5], exec
3048; VI-NEXT:    v_cndmask_b32_e64 v0, -v1, v0, s[4:5]
3049; VI-NEXT:    s_cselect_b32 s0, 0, s0
3050; VI-NEXT:    v_mov_b32_e32 v2, s2
3051; VI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[4:5]
3052; VI-NEXT:    v_mov_b32_e32 v0, s0
3053; VI-NEXT:    v_mov_b32_e32 v3, s3
3054; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3055; VI-NEXT:    s_endpgm
3056  %i = select i1 %arg1, double 0.0, double %arg
3057  %i2 = fneg double %i
3058  %i3 = select i1 %arg1, double 0.0, double %i2
3059  store double %i3, ptr addrspace(1) %ptr, align 4
3060  ret void
3061}
3062
3063define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) {
3064; GCN-LABEL: v_fneg_select_infloop_regression_f64:
3065; GCN:       ; %bb.0:
3066; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3067; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
3068; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
3069; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
3070; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
3071; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3072; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
3073; GCN-NEXT:    s_setpc_b64 s[30:31]
3074  %i = select i1 %arg1, double 0.0, double %arg
3075  %i2 = fneg double %i
3076  %i3 = select i1 %arg1, double 0.0, double %i2
3077  ret double %i3
3078}
3079
3080define amdgpu_kernel void @s_fneg_select_infloop_regression_f16(half %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3081; SI-LABEL: s_fneg_select_infloop_regression_f16:
3082; SI:       ; %bb.0:
3083; SI-NEXT:    s_load_dword s2, s[4:5], 0x9
3084; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0xb
3085; SI-NEXT:    s_waitcnt lgkmcnt(0)
3086; SI-NEXT:    v_cvt_f32_f16_e32 v0, s2
3087; SI-NEXT:    s_bitcmp1_b32 s2, 16
3088; SI-NEXT:    s_cselect_b64 s[2:3], -1, 0
3089; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[2:3]
3090; SI-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, s[2:3]
3091; SI-NEXT:    v_cvt_f16_f32_e32 v2, v0
3092; SI-NEXT:    v_mov_b32_e32 v0, s0
3093; SI-NEXT:    v_mov_b32_e32 v1, s1
3094; SI-NEXT:    flat_store_short v[0:1], v2
3095; SI-NEXT:    s_endpgm
3096;
3097; VI-LABEL: s_fneg_select_infloop_regression_f16:
3098; VI:       ; %bb.0:
3099; VI-NEXT:    s_load_dword s2, s[4:5], 0x24
3100; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2c
3101; VI-NEXT:    s_waitcnt lgkmcnt(0)
3102; VI-NEXT:    s_bitcmp1_b32 s2, 16
3103; VI-NEXT:    v_mov_b32_e32 v0, s2
3104; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
3105; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[2:3]
3106; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
3107; VI-NEXT:    v_cndmask_b32_e64 v2, v0, 0, s[2:3]
3108; VI-NEXT:    v_mov_b32_e32 v0, s0
3109; VI-NEXT:    v_mov_b32_e32 v1, s1
3110; VI-NEXT:    flat_store_short v[0:1], v2
3111; VI-NEXT:    s_endpgm
3112  %i = select i1 %arg1, half 0.0, half %arg
3113  %i2 = fneg half %i
3114  %i3 = select i1 %arg1, half 0.0, half %i2
3115  store half %i3, ptr addrspace(1) %ptr, align 4
3116  ret void
3117}
3118
3119define half @v_fneg_select_infloop_regression_f16(half %arg, i1 %arg1) {
3120; SI-LABEL: v_fneg_select_infloop_regression_f16:
3121; SI:       ; %bb.0:
3122; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3123; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
3124; SI-NEXT:    v_and_b32_e32 v1, 1, v1
3125; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3126; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3127; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3128; SI-NEXT:    v_cndmask_b32_e64 v0, -v0, 0, vcc
3129; SI-NEXT:    s_setpc_b64 s[30:31]
3130;
3131; VI-LABEL: v_fneg_select_infloop_regression_f16:
3132; VI:       ; %bb.0:
3133; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3134; VI-NEXT:    v_and_b32_e32 v1, 1, v1
3135; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3136; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3137; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
3138; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3139; VI-NEXT:    s_setpc_b64 s[30:31]
3140  %i = select i1 %arg1, half 0.0, half %arg
3141  %i2 = fneg half %i
3142  %i3 = select i1 %arg1, half 0.0, half %i2
3143  ret half %i3
3144}
3145
3146define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3147; SI-LABEL: s_fneg_select_infloop_regression_v2f16:
3148; SI:       ; %bb.0:
3149; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
3150; SI-NEXT:    s_waitcnt lgkmcnt(0)
3151; SI-NEXT:    s_and_b32 s1, 1, s1
3152; SI-NEXT:    s_cselect_b32 s0, 0, s0
3153; SI-NEXT:    s_xor_b32 s0, s0, 0x80008000
3154; SI-NEXT:    s_cmp_eq_u32 s1, 1
3155; SI-NEXT:    s_cselect_b32 s0, 0, s0
3156; SI-NEXT:    v_mov_b32_e32 v0, s2
3157; SI-NEXT:    v_mov_b32_e32 v1, s3
3158; SI-NEXT:    v_mov_b32_e32 v2, s0
3159; SI-NEXT:    flat_store_dword v[0:1], v2
3160; SI-NEXT:    s_endpgm
3161;
3162; VI-LABEL: s_fneg_select_infloop_regression_v2f16:
3163; VI:       ; %bb.0:
3164; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
3165; VI-NEXT:    s_waitcnt lgkmcnt(0)
3166; VI-NEXT:    s_and_b32 s1, 1, s1
3167; VI-NEXT:    s_cselect_b32 s0, 0, s0
3168; VI-NEXT:    s_xor_b32 s0, s0, 0x80008000
3169; VI-NEXT:    s_cmp_eq_u32 s1, 1
3170; VI-NEXT:    s_cselect_b32 s0, 0, s0
3171; VI-NEXT:    v_mov_b32_e32 v0, s2
3172; VI-NEXT:    v_mov_b32_e32 v1, s3
3173; VI-NEXT:    v_mov_b32_e32 v2, s0
3174; VI-NEXT:    flat_store_dword v[0:1], v2
3175; VI-NEXT:    s_endpgm
3176  %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg
3177  %i2 = fneg <2 x half> %i
3178  %i3 = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %i2
3179  store <2 x half> %i3, ptr addrspace(1) %ptr, align 4
3180  ret void
3181}
3182
3183define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %arg1) {
3184; SI-LABEL: v_fneg_select_infloop_regression_v2f16:
3185; SI:       ; %bb.0:
3186; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3187; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
3188; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
3189; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
3190; SI-NEXT:    v_or_b32_e32 v0, v0, v1
3191; SI-NEXT:    v_and_b32_e32 v1, 1, v2
3192; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3193; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3194; SI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
3195; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, vcc
3196; SI-NEXT:    v_cvt_f32_f16_e32 v0, v1
3197; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3198; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
3199; SI-NEXT:    s_setpc_b64 s[30:31]
3200;
3201; VI-LABEL: v_fneg_select_infloop_regression_v2f16:
3202; VI:       ; %bb.0:
3203; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3204; VI-NEXT:    v_and_b32_e32 v1, 1, v1
3205; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3206; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3207; VI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
3208; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3209; VI-NEXT:    s_setpc_b64 s[30:31]
3210  %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg
3211  %i2 = fneg <2 x half> %i
3212  %i3 = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %i2
3213  ret <2 x half> %i3
3214}
3215
3216define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3217; SI-LABEL: s_fneg_select_infloop_regression_v2f32:
3218; SI:       ; %bb.0:
3219; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
3220; SI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0xd
3221; SI-NEXT:    v_bfrev_b32_e32 v0, 1
3222; SI-NEXT:    s_waitcnt lgkmcnt(0)
3223; SI-NEXT:    s_bitcmp1_b32 s2, 0
3224; SI-NEXT:    v_mov_b32_e32 v1, s0
3225; SI-NEXT:    s_cselect_b64 s[2:3], -1, 0
3226; SI-NEXT:    v_cndmask_b32_e64 v2, -v1, v0, s[2:3]
3227; SI-NEXT:    v_mov_b32_e32 v1, s1
3228; SI-NEXT:    v_cndmask_b32_e64 v0, -v1, v0, s[2:3]
3229; SI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[2:3]
3230; SI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, s[2:3]
3231; SI-NEXT:    v_mov_b32_e32 v2, s4
3232; SI-NEXT:    v_mov_b32_e32 v3, s5
3233; SI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3234; SI-NEXT:    s_endpgm
3235;
3236; VI-LABEL: s_fneg_select_infloop_regression_v2f32:
3237; VI:       ; %bb.0:
3238; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
3239; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x34
3240; VI-NEXT:    v_bfrev_b32_e32 v0, 1
3241; VI-NEXT:    s_waitcnt lgkmcnt(0)
3242; VI-NEXT:    s_bitcmp1_b32 s2, 0
3243; VI-NEXT:    v_mov_b32_e32 v1, s0
3244; VI-NEXT:    s_cselect_b64 s[2:3], -1, 0
3245; VI-NEXT:    v_cndmask_b32_e64 v2, -v1, v0, s[2:3]
3246; VI-NEXT:    v_mov_b32_e32 v1, s1
3247; VI-NEXT:    v_cndmask_b32_e64 v0, -v1, v0, s[2:3]
3248; VI-NEXT:    v_cndmask_b32_e64 v1, v0, 0, s[2:3]
3249; VI-NEXT:    v_cndmask_b32_e64 v0, v2, 0, s[2:3]
3250; VI-NEXT:    v_mov_b32_e32 v2, s4
3251; VI-NEXT:    v_mov_b32_e32 v3, s5
3252; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
3253; VI-NEXT:    s_endpgm
3254  %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg
3255  %i2 = fneg <2 x float> %i
3256  %i3 = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %i2
3257  store <2 x float> %i3, ptr addrspace(1) %ptr, align 4
3258  ret void
3259}
3260
3261define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 %arg1) {
3262; GCN-LABEL: v_fneg_select_infloop_regression_v2f32:
3263; GCN:       ; %bb.0:
3264; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3265; GCN-NEXT:    v_and_b32_e32 v2, 1, v2
3266; GCN-NEXT:    v_bfrev_b32_e32 v3, 1
3267; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v2
3268; GCN-NEXT:    v_cndmask_b32_e64 v1, -v1, v3, vcc
3269; GCN-NEXT:    v_cndmask_b32_e64 v0, -v0, v3, vcc
3270; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3271; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, 0, vcc
3272; GCN-NEXT:    s_setpc_b64 s[30:31]
3273  %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg
3274  %i2 = fneg <2 x float> %i
3275  %i3 = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %i2
3276  ret <2 x float> %i3
3277}
3278
3279define amdgpu_kernel void @s_fabs_select_infloop_regression_f32(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3280; SI-LABEL: s_fabs_select_infloop_regression_f32:
3281; SI:       ; %bb.0:
3282; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
3283; SI-NEXT:    s_waitcnt lgkmcnt(0)
3284; SI-NEXT:    s_bitcmp1_b32 s1, 0
3285; SI-NEXT:    v_mov_b32_e32 v0, s0
3286; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
3287; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
3288; SI-NEXT:    v_cndmask_b32_e64 v2, |v0|, 0, s[0:1]
3289; SI-NEXT:    v_mov_b32_e32 v0, s2
3290; SI-NEXT:    v_mov_b32_e32 v1, s3
3291; SI-NEXT:    flat_store_dword v[0:1], v2
3292; SI-NEXT:    s_endpgm
3293;
3294; VI-LABEL: s_fabs_select_infloop_regression_f32:
3295; VI:       ; %bb.0:
3296; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
3297; VI-NEXT:    s_waitcnt lgkmcnt(0)
3298; VI-NEXT:    s_bitcmp1_b32 s1, 0
3299; VI-NEXT:    v_mov_b32_e32 v0, s0
3300; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
3301; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
3302; VI-NEXT:    v_cndmask_b32_e64 v2, |v0|, 0, s[0:1]
3303; VI-NEXT:    v_mov_b32_e32 v0, s2
3304; VI-NEXT:    v_mov_b32_e32 v1, s3
3305; VI-NEXT:    flat_store_dword v[0:1], v2
3306; VI-NEXT:    s_endpgm
3307  %i = select i1 %arg1, float 0.0, float %arg
3308  %i2 = call float @llvm.fabs.f32(float %i)
3309  %i3 = select i1 %arg1, float 0.0, float %i2
3310  store float %i3, ptr addrspace(1) %ptr, align 4
3311  ret void
3312}
3313
3314define float @v_fabs_select_infloop_regression_f32(float %arg, i1 %arg1) {
3315; GCN-LABEL: v_fabs_select_infloop_regression_f32:
3316; GCN:       ; %bb.0:
3317; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3318; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
3319; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3320; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3321; GCN-NEXT:    v_cndmask_b32_e64 v0, |v0|, 0, vcc
3322; GCN-NEXT:    s_setpc_b64 s[30:31]
3323  %i = select i1 %arg1, float 0.0, float %arg
3324  %i2 = call float @llvm.fabs.f32(float %i)
3325  %i3 = select i1 %arg1, float 0.0, float %i2
3326  ret float %i3
3327}
3328
3329define amdgpu_kernel void @s_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1, ptr addrspace(1) %ptr) {
3330; SI-LABEL: s_fneg_fabs_select_infloop_regression:
3331; SI:       ; %bb.0:
3332; SI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x9
3333; SI-NEXT:    s_waitcnt lgkmcnt(0)
3334; SI-NEXT:    s_bitcmp1_b32 s1, 0
3335; SI-NEXT:    v_mov_b32_e32 v0, s0
3336; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
3337; SI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
3338; SI-NEXT:    v_cndmask_b32_e64 v2, -|v0|, 0, s[0:1]
3339; SI-NEXT:    v_mov_b32_e32 v0, s2
3340; SI-NEXT:    v_mov_b32_e32 v1, s3
3341; SI-NEXT:    flat_store_dword v[0:1], v2
3342; SI-NEXT:    s_endpgm
3343;
3344; VI-LABEL: s_fneg_fabs_select_infloop_regression:
3345; VI:       ; %bb.0:
3346; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x24
3347; VI-NEXT:    s_waitcnt lgkmcnt(0)
3348; VI-NEXT:    s_bitcmp1_b32 s1, 0
3349; VI-NEXT:    v_mov_b32_e32 v0, s0
3350; VI-NEXT:    s_cselect_b64 s[0:1], -1, 0
3351; VI-NEXT:    v_cndmask_b32_e64 v0, v0, 0, s[0:1]
3352; VI-NEXT:    v_cndmask_b32_e64 v2, -|v0|, 0, s[0:1]
3353; VI-NEXT:    v_mov_b32_e32 v0, s2
3354; VI-NEXT:    v_mov_b32_e32 v1, s3
3355; VI-NEXT:    flat_store_dword v[0:1], v2
3356; VI-NEXT:    s_endpgm
3357  %i = select i1 %arg1, float 0.0, float %arg
3358  %i2 = call float @llvm.fabs.f32(float %i)
3359  %neg.i2 = fneg float %i2
3360  %i3 = select i1 %arg1, float 0.0, float %neg.i2
3361  store float %i3, ptr addrspace(1) %ptr, align 4
3362  ret void
3363}
3364
3365define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) {
3366; GCN-LABEL: v_fneg_fabs_select_infloop_regression:
3367; GCN:       ; %bb.0:
3368; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3369; GCN-NEXT:    v_and_b32_e32 v1, 1, v1
3370; GCN-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v1
3371; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, 0, vcc
3372; GCN-NEXT:    v_cndmask_b32_e64 v0, -|v0|, 0, vcc
3373; GCN-NEXT:    s_setpc_b64 s[30:31]
3374  %i = select i1 %arg1, float 0.0, float %arg
3375  %i2 = call float @llvm.fabs.f32(float %i)
3376  %neg.i2 = fneg float %i2
3377  %i3 = select i1 %arg1, float 0.0, float %neg.i2
3378  ret float %i3
3379}
3380
3381define float @v_fmul_0_fsub_0_infloop_regression(float %arg) {
3382; GCN-SAFE-LABEL: v_fmul_0_fsub_0_infloop_regression:
3383; GCN-SAFE:       ; %bb.0: ; %bb
3384; GCN-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3385; GCN-SAFE-NEXT:    v_mul_f32_e32 v0, 0, v0
3386; GCN-SAFE-NEXT:    v_sub_f32_e32 v0, 0, v0
3387; GCN-SAFE-NEXT:    s_setpc_b64 s[30:31]
3388;
3389; GCN-NSZ-LABEL: v_fmul_0_fsub_0_infloop_regression:
3390; GCN-NSZ:       ; %bb.0: ; %bb
3391; GCN-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3392; GCN-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
3393; GCN-NSZ-NEXT:    s_setpc_b64 s[30:31]
3394bb:
3395  %i = fmul float %arg, 0.0
3396  %i1 = fsub float 0.0, %i
3397  ret float %i1
3398}
3399
3400declare i32 @llvm.amdgcn.workitem.id.x() #1
3401declare float @llvm.fma.f32(float, float, float) #1
3402declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
3403declare float @llvm.fmuladd.f32(float, float, float) #1
3404declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
3405declare float @llvm.fabs.f32(float) #1
3406declare float @llvm.sin.f32(float) #1
3407declare float @llvm.trunc.f32(float) #1
3408declare float @llvm.round.f32(float) #1
3409declare float @llvm.rint.f32(float) #1
3410declare float @llvm.nearbyint.f32(float) #1
3411declare float @llvm.roundeven.f32(float) #1
3412declare float @llvm.canonicalize.f32(float) #1
3413declare float @llvm.arithmetic.fence.f32(float) #1
3414declare float @llvm.minnum.f32(float, float) #1
3415declare float @llvm.maxnum.f32(float, float) #1
3416declare double @llvm.minnum.f64(double, double) #1
3417declare double @llvm.fma.f64(double, double, double) #1
3418
3419declare float @llvm.amdgcn.sin.f32(float) #1
3420declare float @llvm.amdgcn.rcp.f32(float) #1
3421declare float @llvm.amdgcn.rcp.legacy(float) #1
3422declare float @llvm.amdgcn.fmul.legacy(float, float) #1
3423declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
3424declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
3425
3426declare half @llvm.fma.f16(half, half, half) #1
3427declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
3428declare half @llvm.fmuladd.f16(half, half, half) #1
3429declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
3430declare half @llvm.sin.f16(half) #1
3431declare half @llvm.trunc.f16(half) #1
3432declare half @llvm.round.f16(half) #1
3433declare half @llvm.rint.f16(half) #1
3434declare half @llvm.nearbyint.f16(half) #1
3435declare half @llvm.canonicalize.f16(half) #1
3436declare half @llvm.minnum.f16(half, half) #1
3437declare half @llvm.maxnum.f16(half, half) #1
3438declare half @llvm.amdgcn.sin.f16(half) #1
3439declare half @llvm.amdgcn.rcp.f16(half) #1
3440
3441attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
3442attributes #1 = { nounwind readnone }
3443attributes #2 = { nounwind "unsafe-fp-math"="true" }
3444attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
3445attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
3446