xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll (revision ceb744eb2fa0895db1526110462745962fdf43c0)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-SAFE %s
3; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=SI,SI-NSZ %s
4
5; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-SAFE %s
6; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=VI,VI-NSZ %s
7
8; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-SAFE %s
9; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11,GFX11-NSZ %s
10
11; --------------------------------------------------------------------------------
12; fadd tests
13; --------------------------------------------------------------------------------
14
15define half @v_fneg_add_f16(half %a, half %b) #0 {
16; SI-SAFE-LABEL: v_fneg_add_f16:
17; SI-SAFE:       ; %bb.0:
18; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
20; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
21; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
22; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
23; SI-SAFE-NEXT:    v_add_f32_e32 v0, v0, v1
24; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
25; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
26;
27; SI-NSZ-LABEL: v_fneg_add_f16:
28; SI-NSZ:       ; %bb.0:
29; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
31; SI-NSZ-NEXT:    v_cvt_f16_f32_e64 v0, -v0
32; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
33; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
34; SI-NSZ-NEXT:    v_sub_f32_e32 v0, v0, v1
35; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
36;
37; VI-SAFE-LABEL: v_fneg_add_f16:
38; VI-SAFE:       ; %bb.0:
39; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; VI-SAFE-NEXT:    v_add_f16_e32 v0, v0, v1
41; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
42; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
43;
44; VI-NSZ-LABEL: v_fneg_add_f16:
45; VI-NSZ:       ; %bb.0:
46; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; VI-NSZ-NEXT:    v_sub_f16_e64 v0, -v0, v1
48; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX11-SAFE-LABEL: v_fneg_add_f16:
51; GFX11-SAFE:       ; %bb.0:
52; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX11-SAFE-NEXT:    v_add_f16_e32 v0, v0, v1
54; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
55; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
56; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
57;
58; GFX11-NSZ-LABEL: v_fneg_add_f16:
59; GFX11-NSZ:       ; %bb.0:
60; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61; GFX11-NSZ-NEXT:    v_sub_f16_e64 v0, -v0, v1
62; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
63  %add = fadd half %a, %b
64  %fneg = fneg half %add
65  ret half %fneg
66}
67
68define { half, half } @v_fneg_add_store_use_add_f16(half %a, half %b) #0 {
69; SI-LABEL: v_fneg_add_store_use_add_f16:
70; SI:       ; %bb.0:
71; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
73; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
74; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
75; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
76; SI-NEXT:    v_add_f32_e32 v1, v0, v1
77; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
78; SI-NEXT:    s_setpc_b64 s[30:31]
79;
80; VI-LABEL: v_fneg_add_store_use_add_f16:
81; VI:       ; %bb.0:
82; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83; VI-NEXT:    v_add_f16_e32 v1, v0, v1
84; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
85; VI-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX11-LABEL: v_fneg_add_store_use_add_f16:
88; GFX11:       ; %bb.0:
89; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX11-NEXT:    v_add_f16_e32 v1, v0, v1
91; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
92; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
93; GFX11-NEXT:    s_setpc_b64 s[30:31]
94  %add = fadd half %a, %b
95  %fneg = fneg half %add
96  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
97  %insert.1 = insertvalue { half, half } %insert.0, half %add, 1
98  ret { half, half } %insert.1
99}
100
101define { half, half } @v_fneg_add_multi_use_add_f16(half %a, half %b) #0 {
102; SI-SAFE-LABEL: v_fneg_add_multi_use_add_f16:
103; SI-SAFE:       ; %bb.0:
104; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
106; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
107; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
108; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
109; SI-SAFE-NEXT:    v_add_f32_e32 v1, v0, v1
110; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
111; SI-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v1
112; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
113;
114; SI-NSZ-LABEL: v_fneg_add_multi_use_add_f16:
115; SI-NSZ:       ; %bb.0:
116; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
118; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
119; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
120; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v0, -v0
121; SI-NSZ-NEXT:    v_sub_f32_e32 v0, v0, v1
122; SI-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
123; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
124;
125; VI-SAFE-LABEL: v_fneg_add_multi_use_add_f16:
126; VI-SAFE:       ; %bb.0:
127; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128; VI-SAFE-NEXT:    v_add_f16_e32 v1, v0, v1
129; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
130; VI-SAFE-NEXT:    v_mul_f16_e32 v1, 4.0, v1
131; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
132;
133; VI-NSZ-LABEL: v_fneg_add_multi_use_add_f16:
134; VI-NSZ:       ; %bb.0:
135; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; VI-NSZ-NEXT:    v_sub_f16_e64 v0, -v0, v1
137; VI-NSZ-NEXT:    v_mul_f16_e32 v1, -4.0, v0
138; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
139;
140; GFX11-SAFE-LABEL: v_fneg_add_multi_use_add_f16:
141; GFX11-SAFE:       ; %bb.0:
142; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
143; GFX11-SAFE-NEXT:    v_add_f16_e32 v1, v0, v1
144; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
145; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
146; GFX11-SAFE-NEXT:    v_mul_f16_e32 v1, 4.0, v1
147; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
148;
149; GFX11-NSZ-LABEL: v_fneg_add_multi_use_add_f16:
150; GFX11-NSZ:       ; %bb.0:
151; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152; GFX11-NSZ-NEXT:    v_sub_f16_e64 v0, -v0, v1
153; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
154; GFX11-NSZ-NEXT:    v_mul_f16_e32 v1, -4.0, v0
155; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
156  %add = fadd half %a, %b
157  %fneg = fneg half %add
158  %use1 = fmul half %add, 4.0
159
160  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
161  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
162  ret { half, half } %insert.1
163}
164
165define half @v_fneg_add_fneg_x_f16(half %a, half %b) #0 {
166; SI-SAFE-LABEL: v_fneg_add_fneg_x_f16:
167; SI-SAFE:       ; %bb.0:
168; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
170; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
171; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
172; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
173; SI-SAFE-NEXT:    v_sub_f32_e32 v0, v1, v0
174; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
175; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
176;
177; SI-NSZ-LABEL: v_fneg_add_fneg_x_f16:
178; SI-NSZ:       ; %bb.0:
179; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
181; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
182; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
183; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
184; SI-NSZ-NEXT:    v_sub_f32_e32 v0, v0, v1
185; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
186;
187; VI-SAFE-LABEL: v_fneg_add_fneg_x_f16:
188; VI-SAFE:       ; %bb.0:
189; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; VI-SAFE-NEXT:    v_sub_f16_e32 v0, v1, v0
191; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
192; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
193;
194; VI-NSZ-LABEL: v_fneg_add_fneg_x_f16:
195; VI-NSZ:       ; %bb.0:
196; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; VI-NSZ-NEXT:    v_sub_f16_e32 v0, v0, v1
198; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
199;
200; GFX11-SAFE-LABEL: v_fneg_add_fneg_x_f16:
201; GFX11-SAFE:       ; %bb.0:
202; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX11-SAFE-NEXT:    v_sub_f16_e32 v0, v1, v0
204; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
205; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
206; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
207;
208; GFX11-NSZ-LABEL: v_fneg_add_fneg_x_f16:
209; GFX11-NSZ:       ; %bb.0:
210; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211; GFX11-NSZ-NEXT:    v_sub_f16_e32 v0, v0, v1
212; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
213  %fneg.a = fneg half %a
214  %add = fadd half %fneg.a, %b
215  %fneg = fneg half %add
216  ret half %fneg
217}
218
219define half @v_fneg_add_x_fneg_f16(half %a, half %b) #0 {
220; SI-SAFE-LABEL: v_fneg_add_x_fneg_f16:
221; SI-SAFE:       ; %bb.0:
222; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
224; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
225; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
226; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
227; SI-SAFE-NEXT:    v_sub_f32_e32 v0, v0, v1
228; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
229; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
230;
231; SI-NSZ-LABEL: v_fneg_add_x_fneg_f16:
232; SI-NSZ:       ; %bb.0:
233; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
235; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
236; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
237; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
238; SI-NSZ-NEXT:    v_sub_f32_e32 v0, v1, v0
239; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
240;
241; VI-SAFE-LABEL: v_fneg_add_x_fneg_f16:
242; VI-SAFE:       ; %bb.0:
243; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244; VI-SAFE-NEXT:    v_sub_f16_e32 v0, v0, v1
245; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
246; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
247;
248; VI-NSZ-LABEL: v_fneg_add_x_fneg_f16:
249; VI-NSZ:       ; %bb.0:
250; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251; VI-NSZ-NEXT:    v_sub_f16_e32 v0, v1, v0
252; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
253;
254; GFX11-SAFE-LABEL: v_fneg_add_x_fneg_f16:
255; GFX11-SAFE:       ; %bb.0:
256; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257; GFX11-SAFE-NEXT:    v_sub_f16_e32 v0, v0, v1
258; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
259; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
260; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
261;
262; GFX11-NSZ-LABEL: v_fneg_add_x_fneg_f16:
263; GFX11-NSZ:       ; %bb.0:
264; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265; GFX11-NSZ-NEXT:    v_sub_f16_e32 v0, v1, v0
266; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
267  %fneg.b = fneg half %b
268  %add = fadd half %a, %fneg.b
269  %fneg = fneg half %add
270  ret half %fneg
271}
272
273define half @v_fneg_add_fneg_fneg_f16(half %a, half %b) #0 {
274; SI-SAFE-LABEL: v_fneg_add_fneg_fneg_f16:
275; SI-SAFE:       ; %bb.0:
276; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
277; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
278; SI-SAFE-NEXT:    v_cvt_f16_f32_e64 v0, -v0
279; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
280; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
281; SI-SAFE-NEXT:    v_sub_f32_e32 v0, v0, v1
282; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
283; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
284;
285; SI-NSZ-LABEL: v_fneg_add_fneg_fneg_f16:
286; SI-NSZ:       ; %bb.0:
287; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
289; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
290; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
291; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
292; SI-NSZ-NEXT:    v_add_f32_e32 v0, v0, v1
293; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
294;
295; VI-SAFE-LABEL: v_fneg_add_fneg_fneg_f16:
296; VI-SAFE:       ; %bb.0:
297; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298; VI-SAFE-NEXT:    v_sub_f16_e64 v0, -v0, v1
299; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
300; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
301;
302; VI-NSZ-LABEL: v_fneg_add_fneg_fneg_f16:
303; VI-NSZ:       ; %bb.0:
304; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305; VI-NSZ-NEXT:    v_add_f16_e32 v0, v0, v1
306; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
307;
308; GFX11-SAFE-LABEL: v_fneg_add_fneg_fneg_f16:
309; GFX11-SAFE:       ; %bb.0:
310; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
311; GFX11-SAFE-NEXT:    v_sub_f16_e64 v0, -v0, v1
312; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
313; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
314; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
315;
316; GFX11-NSZ-LABEL: v_fneg_add_fneg_fneg_f16:
317; GFX11-NSZ:       ; %bb.0:
318; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319; GFX11-NSZ-NEXT:    v_add_f16_e32 v0, v0, v1
320; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
321  %fneg.a = fneg half %a
322  %fneg.b = fneg half %b
323  %add = fadd half %fneg.a, %fneg.b
324  %fneg = fneg half %add
325  ret half %fneg
326}
327
328define { half, half } @v_fneg_add_store_use_fneg_x_f16(half %a, half %b) #0 {
329; SI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f16:
330; SI-SAFE:       ; %bb.0:
331; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
333; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
334; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v0
335; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v1
336; SI-SAFE-NEXT:    v_cvt_f32_f16_e64 v1, -v0
337; SI-SAFE-NEXT:    v_sub_f32_e32 v0, v3, v2
338; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
339; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
340;
341; SI-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f16:
342; SI-NSZ:       ; %bb.0:
343; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
345; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
346; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v1
347; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v3, v0
348; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v1, -v0
349; SI-NSZ-NEXT:    v_sub_f32_e32 v0, v3, v2
350; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
351;
352; VI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f16:
353; VI-SAFE:       ; %bb.0:
354; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355; VI-SAFE-NEXT:    v_xor_b32_e32 v2, 0x8000, v0
356; VI-SAFE-NEXT:    v_sub_f16_e32 v0, v1, v0
357; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
358; VI-SAFE-NEXT:    v_mov_b32_e32 v1, v2
359; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
360;
361; VI-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f16:
362; VI-NSZ:       ; %bb.0:
363; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; VI-NSZ-NEXT:    v_xor_b32_e32 v2, 0x8000, v0
365; VI-NSZ-NEXT:    v_sub_f16_e32 v0, v0, v1
366; VI-NSZ-NEXT:    v_mov_b32_e32 v1, v2
367; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX11-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f16:
370; GFX11-SAFE:       ; %bb.0:
371; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX11-SAFE-NEXT:    v_sub_f16_e32 v1, v1, v0
373; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
374; GFX11-SAFE-NEXT:    v_xor_b32_e32 v2, 0x8000, v1
375; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
376; GFX11-SAFE-NEXT:    v_mov_b32_e32 v0, v2
377; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
378;
379; GFX11-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f16:
380; GFX11-NSZ:       ; %bb.0:
381; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382; GFX11-NSZ-NEXT:    v_sub_f16_e32 v2, v0, v1
383; GFX11-NSZ-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
384; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
385; GFX11-NSZ-NEXT:    v_mov_b32_e32 v0, v2
386; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
387  %fneg.a = fneg half %a
388  %add = fadd half %fneg.a, %b
389  %fneg = fneg half %add
390  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
391  %insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1
392  ret { half, half } %insert.1
393}
394
395define { half, half } @v_fneg_add_multi_use_fneg_x_f16(half %a, half %b, half %c) #0 {
396; SI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f16:
397; SI-SAFE:       ; %bb.0:
398; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
400; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
401; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
402; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v0
403; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
404; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
405; SI-SAFE-NEXT:    v_cvt_f32_f16_e64 v4, -v0
406; SI-SAFE-NEXT:    v_sub_f32_e32 v0, v1, v3
407; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
408; SI-SAFE-NEXT:    v_mul_f32_e32 v1, v4, v2
409; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
410;
411; SI-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f16:
412; SI-NSZ:       ; %bb.0:
413; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
415; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
416; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
417; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
418; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
419; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v3, v0
420; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v4, -v0
421; SI-NSZ-NEXT:    v_sub_f32_e32 v0, v3, v1
422; SI-NSZ-NEXT:    v_mul_f32_e32 v1, v4, v2
423; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
424;
425; VI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f16:
426; VI-SAFE:       ; %bb.0:
427; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428; VI-SAFE-NEXT:    v_sub_f16_e32 v1, v1, v0
429; VI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x8000, v1
430; VI-SAFE-NEXT:    v_mul_f16_e64 v1, -v0, v2
431; VI-SAFE-NEXT:    v_mov_b32_e32 v0, v3
432; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
433;
434; VI-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f16:
435; VI-NSZ:       ; %bb.0:
436; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; VI-NSZ-NEXT:    v_sub_f16_e32 v3, v0, v1
438; VI-NSZ-NEXT:    v_mul_f16_e64 v1, -v0, v2
439; VI-NSZ-NEXT:    v_mov_b32_e32 v0, v3
440; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
441;
442; GFX11-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f16:
443; GFX11-SAFE:       ; %bb.0:
444; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445; GFX11-SAFE-NEXT:    v_sub_f16_e32 v1, v1, v0
446; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
447; GFX11-SAFE-NEXT:    v_xor_b32_e32 v3, 0x8000, v1
448; GFX11-SAFE-NEXT:    v_mul_f16_e64 v1, -v0, v2
449; GFX11-SAFE-NEXT:    v_mov_b32_e32 v0, v3
450; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
451;
452; GFX11-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f16:
453; GFX11-NSZ:       ; %bb.0:
454; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455; GFX11-NSZ-NEXT:    v_sub_f16_e32 v3, v0, v1
456; GFX11-NSZ-NEXT:    v_mul_f16_e64 v1, -v0, v2
457; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
458; GFX11-NSZ-NEXT:    v_mov_b32_e32 v0, v3
459; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
460  %fneg.a = fneg half %a
461  %add = fadd half %fneg.a, %b
462  %fneg = fneg half %add
463  %use1 = fmul half %fneg.a, %c
464
465  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
466  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
467  ret { half, half } %insert.1
468}
469
470; This one asserted with -enable-no-signed-zeros-fp-math
471define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #0 {
472; SI-SAFE-LABEL: fneg_fadd_0_f16:
473; SI-SAFE:       ; %bb.0: ; %.entry
474; SI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
475; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, s1
476; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, s0
477; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
478; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
479; SI-SAFE-NEXT:    v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
480; SI-SAFE-NEXT:    v_rcp_f32_e32 v3, v2
481; SI-SAFE-NEXT:    v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
482; SI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
483; SI-SAFE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
484; SI-SAFE-NEXT:    v_fma_f32 v3, v5, v3, v3
485; SI-SAFE-NEXT:    v_mul_f32_e32 v5, v4, v3
486; SI-SAFE-NEXT:    v_fma_f32 v6, -v2, v5, v4
487; SI-SAFE-NEXT:    v_fma_f32 v5, v6, v3, v5
488; SI-SAFE-NEXT:    v_fma_f32 v2, -v2, v5, v4
489; SI-SAFE-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
490; SI-SAFE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
491; SI-SAFE-NEXT:    v_div_fixup_f32 v0, v2, v0, 1.0
492; SI-SAFE-NEXT:    v_mad_f32 v0, v0, 0, 0
493; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v1
494; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, -v0, v1, vcc
495; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
496; SI-SAFE-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
497; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
498; SI-SAFE-NEXT:    ; return to shader part epilog
499;
500; SI-NSZ-LABEL: fneg_fadd_0_f16:
501; SI-NSZ:       ; %bb.0: ; %.entry
502; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
503; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, s1
504; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, s0
505; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
506; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
507; SI-NSZ-NEXT:    v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
508; SI-NSZ-NEXT:    v_rcp_f32_e32 v3, v2
509; SI-NSZ-NEXT:    v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
510; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
511; SI-NSZ-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
512; SI-NSZ-NEXT:    v_fma_f32 v3, v5, v3, v3
513; SI-NSZ-NEXT:    v_mul_f32_e32 v5, v4, v3
514; SI-NSZ-NEXT:    v_fma_f32 v6, -v2, v5, v4
515; SI-NSZ-NEXT:    v_fma_f32 v5, v6, v3, v5
516; SI-NSZ-NEXT:    v_fma_f32 v2, -v2, v5, v4
517; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
518; SI-NSZ-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
519; SI-NSZ-NEXT:    v_div_fixup_f32 v0, v2, v0, 1.0
520; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
521; SI-NSZ-NEXT:    v_cmp_nlt_f32_e64 vcc, -v0, v1
522; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
523; SI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
524; SI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
525; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
526; SI-NSZ-NEXT:    ; return to shader part epilog
527;
528; VI-SAFE-LABEL: fneg_fadd_0_f16:
529; VI-SAFE:       ; %bb.0: ; %.entry
530; VI-SAFE-NEXT:    v_rcp_f16_e32 v0, s1
531; VI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
532; VI-SAFE-NEXT:    v_mul_f16_e32 v0, 0, v0
533; VI-SAFE-NEXT:    v_add_f16_e32 v0, 0, v0
534; VI-SAFE-NEXT:    v_xor_b32_e32 v2, 0x8000, v0
535; VI-SAFE-NEXT:    v_cmp_ngt_f16_e32 vcc, s0, v0
536; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
537; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7e00
538; VI-SAFE-NEXT:    v_cmp_nlt_f16_e32 vcc, 0, v0
539; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
540; VI-SAFE-NEXT:    ; return to shader part epilog
541;
542; VI-NSZ-LABEL: fneg_fadd_0_f16:
543; VI-NSZ:       ; %bb.0: ; %.entry
544; VI-NSZ-NEXT:    v_rcp_f16_e32 v0, s1
545; VI-NSZ-NEXT:    v_mov_b32_e32 v1, s0
546; VI-NSZ-NEXT:    v_mul_f16_e32 v0, 0x8000, v0
547; VI-NSZ-NEXT:    v_cmp_nlt_f16_e64 vcc, -v0, s0
548; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
549; VI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7e00
550; VI-NSZ-NEXT:    v_cmp_nlt_f16_e32 vcc, 0, v0
551; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
552; VI-NSZ-NEXT:    ; return to shader part epilog
553;
554; GFX11-SAFE-LABEL: fneg_fadd_0_f16:
555; GFX11-SAFE:       ; %bb.0: ; %.entry
556; GFX11-SAFE-NEXT:    v_rcp_f16_e32 v0, s1
557; GFX11-SAFE-NEXT:    s_waitcnt_depctr 0xfff
558; GFX11-SAFE-NEXT:    v_mul_f16_e32 v0, 0, v0
559; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
560; GFX11-SAFE-NEXT:    v_add_f16_e32 v0, 0, v0
561; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
562; GFX11-SAFE-NEXT:    v_cmp_ngt_f16_e32 vcc_lo, s0, v0
563; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
564; GFX11-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, s0, vcc_lo
565; GFX11-SAFE-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0, v0
566; GFX11-SAFE-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
567; GFX11-SAFE-NEXT:    ; return to shader part epilog
568;
569; GFX11-NSZ-LABEL: fneg_fadd_0_f16:
570; GFX11-NSZ:       ; %bb.0: ; %.entry
571; GFX11-NSZ-NEXT:    v_rcp_f16_e32 v0, s1
572; GFX11-NSZ-NEXT:    s_waitcnt_depctr 0xfff
573; GFX11-NSZ-NEXT:    v_mul_f16_e32 v0, 0x8000, v0
574; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
575; GFX11-NSZ-NEXT:    v_cmp_nlt_f16_e64 s1, -v0, s0
576; GFX11-NSZ-NEXT:    v_cndmask_b32_e64 v0, v0, s0, s1
577; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
578; GFX11-NSZ-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0, v0
579; GFX11-NSZ-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
580; GFX11-NSZ-NEXT:    ; return to shader part epilog
581.entry:
582  %tmp7 = fdiv half 1.000000e+00, %tmp6
583  %tmp8 = fmul half 0.000000e+00, %tmp7
584  %tmp9 = fmul reassoc nnan arcp contract half 0.000000e+00, %tmp8
585  %.i188 = fadd half %tmp9, 0.000000e+00
586  %tmp10 = fcmp uge half %.i188, %tmp2
587  %tmp11 = fneg half %.i188
588  %.i092 = select i1 %tmp10, half %tmp2, half %tmp11
589  %tmp12 = fcmp ule half %.i092, 0.000000e+00
590  %.i198 = select i1 %tmp12, half 0.000000e+00, half 0x7FF8000000000000
591  ret half %.i198
592}
593
594; This is a workaround because -enable-no-signed-zeros-fp-math does not set up
595; function attribute unsafe-fp-math automatically. Combine with the previous test
596; when that is done.
597define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <4 x i32> %arg) #2 {
598; SI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
599; SI-SAFE:       ; %bb.0: ; %.entry
600; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, s0
601; SI-SAFE-NEXT:    s_brev_b32 s0, 1
602; SI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
603; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
604; SI-SAFE-NEXT:    v_min_legacy_f32_e32 v0, 0, v0
605; SI-SAFE-NEXT:    v_cmp_ngt_f32_e32 vcc, s0, v0
606; SI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
607; SI-SAFE-NEXT:    ; return to shader part epilog
608;
609; SI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
610; SI-NSZ:       ; %bb.0: ; %.entry
611; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, s1
612; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, s0
613; SI-NSZ-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
614; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
615; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
616; SI-NSZ-NEXT:    v_rcp_f32_e32 v0, v0
617; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
618; SI-NSZ-NEXT:    v_cmp_nlt_f32_e64 vcc, -v0, v1
619; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
620; SI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
621; SI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v2, 0, vcc
622; SI-NSZ-NEXT:    ; return to shader part epilog
623;
624; VI-SAFE-LABEL: fneg_fadd_0_nsz_f16:
625; VI-SAFE:       ; %bb.0: ; %.entry
626; VI-SAFE-NEXT:    v_mov_b32_e32 v0, 0x8000
627; VI-SAFE-NEXT:    v_mov_b32_e32 v1, s0
628; VI-SAFE-NEXT:    v_cmp_ngt_f16_e64 vcc, s0, 0
629; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
630; VI-SAFE-NEXT:    v_mov_b32_e32 v1, 0x7e00
631; VI-SAFE-NEXT:    v_cmp_nlt_f16_e32 vcc, 0, v0
632; VI-SAFE-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
633; VI-SAFE-NEXT:    ; return to shader part epilog
634;
635; VI-NSZ-LABEL: fneg_fadd_0_nsz_f16:
636; VI-NSZ:       ; %bb.0: ; %.entry
637; VI-NSZ-NEXT:    v_rcp_f16_e32 v0, s1
638; VI-NSZ-NEXT:    v_mov_b32_e32 v1, s0
639; VI-NSZ-NEXT:    v_mul_f16_e32 v0, 0x8000, v0
640; VI-NSZ-NEXT:    v_cmp_nlt_f16_e64 vcc, -v0, s0
641; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
642; VI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7e00
643; VI-NSZ-NEXT:    v_cmp_nlt_f16_e32 vcc, 0, v0
644; VI-NSZ-NEXT:    v_cndmask_b32_e64 v0, v1, 0, vcc
645; VI-NSZ-NEXT:    ; return to shader part epilog
646;
647; GFX11-SAFE-LABEL: fneg_fadd_0_nsz_f16:
648; GFX11-SAFE:       ; %bb.0: ; %.entry
649; GFX11-SAFE-NEXT:    v_mov_b32_e32 v0, s0
650; GFX11-SAFE-NEXT:    v_cmp_ngt_f16_e64 vcc_lo, s0, 0
651; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
652; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x8000, v0, vcc_lo
653; GFX11-SAFE-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0, v0
654; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_3)
655; GFX11-SAFE-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
656; GFX11-SAFE-NEXT:    ; return to shader part epilog
657;
658; GFX11-NSZ-LABEL: fneg_fadd_0_nsz_f16:
659; GFX11-NSZ:       ; %bb.0: ; %.entry
660; GFX11-NSZ-NEXT:    v_rcp_f16_e32 v0, s1
661; GFX11-NSZ-NEXT:    s_waitcnt_depctr 0xfff
662; GFX11-NSZ-NEXT:    v_mul_f16_e32 v0, 0x8000, v0
663; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
664; GFX11-NSZ-NEXT:    v_cmp_nlt_f16_e64 s1, -v0, s0
665; GFX11-NSZ-NEXT:    v_cndmask_b32_e64 v0, v0, s0, s1
666; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
667; GFX11-NSZ-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0, v0
668; GFX11-NSZ-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
669; GFX11-NSZ-NEXT:    ; return to shader part epilog
670.entry:
671  %tmp7 = fdiv afn half 1.000000e+00, %tmp6
672  %tmp8 = fmul half 0.000000e+00, %tmp7
673  %tmp9 = fmul reassoc nnan arcp contract half 0.000000e+00, %tmp8
674  %.i188 = fadd half %tmp9, 0.000000e+00
675  %tmp10 = fcmp uge half %.i188, %tmp2
676  %tmp11 = fneg half %.i188
677  %.i092 = select i1 %tmp10, half %tmp2, half %tmp11
678  %tmp12 = fcmp ule half %.i092, 0.000000e+00
679  %.i198 = select i1 %tmp12, half 0.000000e+00, half 0x7FF8000000000000
680  ret half %.i198
681}
682
683; --------------------------------------------------------------------------------
684; fmul tests
685; --------------------------------------------------------------------------------
686
687define half @v_fneg_mul_f16(half %a, half %b) #0 {
688; SI-LABEL: v_fneg_mul_f16:
689; SI:       ; %bb.0:
690; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
692; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
693; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
694; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
695; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
696; SI-NEXT:    s_setpc_b64 s[30:31]
697;
698; VI-LABEL: v_fneg_mul_f16:
699; VI:       ; %bb.0:
700; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
701; VI-NEXT:    v_mul_f16_e64 v0, v0, -v1
702; VI-NEXT:    s_setpc_b64 s[30:31]
703;
704; GFX11-LABEL: v_fneg_mul_f16:
705; GFX11:       ; %bb.0:
706; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707; GFX11-NEXT:    v_mul_f16_e64 v0, v0, -v1
708; GFX11-NEXT:    s_setpc_b64 s[30:31]
709  %mul = fmul half %a, %b
710  %fneg = fneg half %mul
711  ret half %fneg
712}
713
714define { half, half } @v_fneg_mul_store_use_mul_f16(half %a, half %b) #0 {
715; SI-LABEL: v_fneg_mul_store_use_mul_f16:
716; SI:       ; %bb.0:
717; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
718; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
719; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
720; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
721; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
722; SI-NEXT:    v_mul_f32_e32 v1, v0, v1
723; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
724; SI-NEXT:    s_setpc_b64 s[30:31]
725;
726; VI-LABEL: v_fneg_mul_store_use_mul_f16:
727; VI:       ; %bb.0:
728; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
729; VI-NEXT:    v_mul_f16_e32 v1, v0, v1
730; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
731; VI-NEXT:    s_setpc_b64 s[30:31]
732;
733; GFX11-LABEL: v_fneg_mul_store_use_mul_f16:
734; GFX11:       ; %bb.0:
735; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736; GFX11-NEXT:    v_mul_f16_e32 v1, v0, v1
737; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
738; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
739; GFX11-NEXT:    s_setpc_b64 s[30:31]
740  %mul = fmul half %a, %b
741  %fneg = fneg half %mul
742  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
743  %insert.1 = insertvalue { half, half } %insert.0, half %mul, 1
744  ret { half, half } %insert.1
745}
746
747define { half, half } @v_fneg_mul_multi_use_mul_f16(half %a, half %b) #0 {
748; SI-LABEL: v_fneg_mul_multi_use_mul_f16:
749; SI:       ; %bb.0:
750; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
751; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
752; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
753; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
754; SI-NEXT:    v_cvt_f32_f16_e64 v1, -v1
755; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
756; SI-NEXT:    v_mul_f32_e32 v1, -4.0, v0
757; SI-NEXT:    s_setpc_b64 s[30:31]
758;
759; VI-LABEL: v_fneg_mul_multi_use_mul_f16:
760; VI:       ; %bb.0:
761; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
762; VI-NEXT:    v_mul_f16_e64 v0, v0, -v1
763; VI-NEXT:    v_mul_f16_e32 v1, -4.0, v0
764; VI-NEXT:    s_setpc_b64 s[30:31]
765;
766; GFX11-LABEL: v_fneg_mul_multi_use_mul_f16:
767; GFX11:       ; %bb.0:
768; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769; GFX11-NEXT:    v_mul_f16_e64 v0, v0, -v1
770; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
771; GFX11-NEXT:    v_mul_f16_e32 v1, -4.0, v0
772; GFX11-NEXT:    s_setpc_b64 s[30:31]
773  %mul = fmul half %a, %b
774  %fneg = fneg half %mul
775  %use1 = fmul half %mul, 4.0
776  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
777  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
778  ret { half, half } %insert.1
779}
780
781define half @v_fneg_mul_fneg_x_f16(half %a, half %b) #0 {
782; SI-LABEL: v_fneg_mul_fneg_x_f16:
783; SI:       ; %bb.0:
784; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
785; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
786; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
787; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
788; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
789; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
790; SI-NEXT:    s_setpc_b64 s[30:31]
791;
792; VI-LABEL: v_fneg_mul_fneg_x_f16:
793; VI:       ; %bb.0:
794; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
795; VI-NEXT:    v_mul_f16_e32 v0, v0, v1
796; VI-NEXT:    s_setpc_b64 s[30:31]
797;
798; GFX11-LABEL: v_fneg_mul_fneg_x_f16:
799; GFX11:       ; %bb.0:
800; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v1
802; GFX11-NEXT:    s_setpc_b64 s[30:31]
803  %fneg.a = fneg half %a
804  %mul = fmul half %fneg.a, %b
805  %fneg = fneg half %mul
806  ret half %fneg
807}
808
809define half @v_fneg_mul_x_fneg_f16(half %a, half %b) #0 {
810; SI-LABEL: v_fneg_mul_x_fneg_f16:
811; SI:       ; %bb.0:
812; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
814; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
815; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
816; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
817; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
818; SI-NEXT:    s_setpc_b64 s[30:31]
819;
820; VI-LABEL: v_fneg_mul_x_fneg_f16:
821; VI:       ; %bb.0:
822; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823; VI-NEXT:    v_mul_f16_e32 v0, v0, v1
824; VI-NEXT:    s_setpc_b64 s[30:31]
825;
826; GFX11-LABEL: v_fneg_mul_x_fneg_f16:
827; GFX11:       ; %bb.0:
828; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v1
830; GFX11-NEXT:    s_setpc_b64 s[30:31]
831  %fneg.b = fneg half %b
832  %mul = fmul half %a, %fneg.b
833  %fneg = fneg half %mul
834  ret half %fneg
835}
836
837define half @v_fneg_mul_fneg_fneg_f16(half %a, half %b) #0 {
838; SI-LABEL: v_fneg_mul_fneg_fneg_f16:
839; SI:       ; %bb.0:
840; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
842; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
843; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
844; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
845; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
846; SI-NEXT:    s_setpc_b64 s[30:31]
847;
848; VI-LABEL: v_fneg_mul_fneg_fneg_f16:
849; VI:       ; %bb.0:
850; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
851; VI-NEXT:    v_mul_f16_e64 v0, v0, -v1
852; VI-NEXT:    s_setpc_b64 s[30:31]
853;
854; GFX11-LABEL: v_fneg_mul_fneg_fneg_f16:
855; GFX11:       ; %bb.0:
856; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
857; GFX11-NEXT:    v_mul_f16_e64 v0, v0, -v1
858; GFX11-NEXT:    s_setpc_b64 s[30:31]
859  %fneg.a = fneg half %a
860  %fneg.b = fneg half %b
861  %mul = fmul half %fneg.a, %fneg.b
862  %fneg = fneg half %mul
863  ret half %fneg
864}
865
866define { half, half } @v_fneg_mul_store_use_fneg_x_f16(half %a, half %b) #0 {
867; SI-LABEL: v_fneg_mul_store_use_fneg_x_f16:
868; SI:       ; %bb.0:
869; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
870; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
871; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
872; SI-NEXT:    v_cvt_f32_f16_e32 v2, v1
873; SI-NEXT:    v_cvt_f32_f16_e32 v3, v0
874; SI-NEXT:    v_cvt_f32_f16_e64 v1, -v0
875; SI-NEXT:    v_mul_f32_e32 v0, v3, v2
876; SI-NEXT:    s_setpc_b64 s[30:31]
877;
878; VI-LABEL: v_fneg_mul_store_use_fneg_x_f16:
879; VI:       ; %bb.0:
880; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881; VI-NEXT:    v_xor_b32_e32 v2, 0x8000, v0
882; VI-NEXT:    v_mul_f16_e32 v0, v0, v1
883; VI-NEXT:    v_mov_b32_e32 v1, v2
884; VI-NEXT:    s_setpc_b64 s[30:31]
885;
886; GFX11-LABEL: v_fneg_mul_store_use_fneg_x_f16:
887; GFX11:       ; %bb.0:
888; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889; GFX11-NEXT:    v_mul_f16_e32 v2, v0, v1
890; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
891; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
892; GFX11-NEXT:    v_mov_b32_e32 v0, v2
893; GFX11-NEXT:    s_setpc_b64 s[30:31]
894  %fneg.a = fneg half %a
895  %mul = fmul half %fneg.a, %b
896  %fneg = fneg half %mul
897  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
898  %insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1
899  ret { half, half } %insert.1
900}
901
902define { half, half } @v_fneg_mul_multi_use_fneg_x_f16(half %a, half %b, half %c) #0 {
903; SI-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
904; SI:       ; %bb.0:
905; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
906; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
907; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
908; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
909; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
910; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
911; SI-NEXT:    v_cvt_f32_f16_e32 v3, v0
912; SI-NEXT:    v_cvt_f32_f16_e64 v4, -v0
913; SI-NEXT:    v_mul_f32_e32 v0, v3, v1
914; SI-NEXT:    v_mul_f32_e32 v1, v4, v2
915; SI-NEXT:    s_setpc_b64 s[30:31]
916;
917; VI-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
918; VI:       ; %bb.0:
919; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
920; VI-NEXT:    v_mul_f16_e32 v3, v0, v1
921; VI-NEXT:    v_mul_f16_e64 v1, -v0, v2
922; VI-NEXT:    v_mov_b32_e32 v0, v3
923; VI-NEXT:    s_setpc_b64 s[30:31]
924;
925; GFX11-LABEL: v_fneg_mul_multi_use_fneg_x_f16:
926; GFX11:       ; %bb.0:
927; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928; GFX11-NEXT:    v_mul_f16_e32 v3, v0, v1
929; GFX11-NEXT:    v_mul_f16_e64 v1, -v0, v2
930; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
931; GFX11-NEXT:    v_mov_b32_e32 v0, v3
932; GFX11-NEXT:    s_setpc_b64 s[30:31]
933  %fneg.a = fneg half %a
934  %mul = fmul half %fneg.a, %b
935  %fneg = fneg half %mul
936  %use1 = fmul half %fneg.a, %c
937  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
938  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
939  ret { half, half } %insert.1
940}
941
942; --------------------------------------------------------------------------------
943; fminnum tests
944; --------------------------------------------------------------------------------
945
946define half @v_fneg_minnum_f16_ieee(half %a, half %b) #0 {
947; SI-LABEL: v_fneg_minnum_f16_ieee:
948; SI:       ; %bb.0:
949; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
950; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
951; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
952; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
953; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
954; SI-NEXT:    v_max_f32_e32 v0, v0, v1
955; SI-NEXT:    s_setpc_b64 s[30:31]
956;
957; VI-LABEL: v_fneg_minnum_f16_ieee:
958; VI:       ; %bb.0:
959; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960; VI-NEXT:    v_max_f16_e64 v1, -v1, -v1
961; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
962; VI-NEXT:    v_max_f16_e32 v0, v0, v1
963; VI-NEXT:    s_setpc_b64 s[30:31]
964;
965; GFX11-LABEL: v_fneg_minnum_f16_ieee:
966; GFX11:       ; %bb.0:
967; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
968; GFX11-NEXT:    v_max_f16_e64 v1, -v1, -v1
969; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
970; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
971; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
972; GFX11-NEXT:    s_setpc_b64 s[30:31]
973  %min = call half @llvm.minnum.f16(half %a, half %b)
974  %fneg = fneg half %min
975  ret half %fneg
976}
977
978define half @v_fneg_minnum_f16_no_ieee(half %a, half %b) #4 {
979; SI-LABEL: v_fneg_minnum_f16_no_ieee:
980; SI:       ; %bb.0:
981; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
982; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
983; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
984; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
985; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
986; SI-NEXT:    v_max_f32_e32 v0, v0, v1
987; SI-NEXT:    s_setpc_b64 s[30:31]
988;
989; VI-LABEL: v_fneg_minnum_f16_no_ieee:
990; VI:       ; %bb.0:
991; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
992; VI-NEXT:    v_max_f16_e64 v0, -v0, -v1
993; VI-NEXT:    s_setpc_b64 s[30:31]
994;
995; GFX11-LABEL: v_fneg_minnum_f16_no_ieee:
996; GFX11:       ; %bb.0:
997; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v1
999; GFX11-NEXT:    s_setpc_b64 s[30:31]
1000  %min = call half @llvm.minnum.f16(half %a, half %b)
1001  %fneg = fneg half %min
1002  ret half %fneg
1003}
1004
1005define half @v_fneg_self_minnum_f16_ieee(half %a) #0 {
1006; SI-LABEL: v_fneg_self_minnum_f16_ieee:
1007; SI:       ; %bb.0:
1008; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1009; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1010; SI-NEXT:    s_setpc_b64 s[30:31]
1011;
1012; VI-LABEL: v_fneg_self_minnum_f16_ieee:
1013; VI:       ; %bb.0:
1014; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1015; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1016; VI-NEXT:    s_setpc_b64 s[30:31]
1017;
1018; GFX11-LABEL: v_fneg_self_minnum_f16_ieee:
1019; GFX11:       ; %bb.0:
1020; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1021; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1022; GFX11-NEXT:    s_setpc_b64 s[30:31]
1023  %min = call half @llvm.minnum.f16(half %a, half %a)
1024  %min.fneg = fneg half %min
1025  ret half %min.fneg
1026}
1027
1028define half @v_fneg_self_minnum_f16_no_ieee(half %a) #4 {
1029; SI-LABEL: v_fneg_self_minnum_f16_no_ieee:
1030; SI:       ; %bb.0:
1031; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1032; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1033; SI-NEXT:    s_setpc_b64 s[30:31]
1034;
1035; VI-LABEL: v_fneg_self_minnum_f16_no_ieee:
1036; VI:       ; %bb.0:
1037; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1038; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1039; VI-NEXT:    s_setpc_b64 s[30:31]
1040;
1041; GFX11-LABEL: v_fneg_self_minnum_f16_no_ieee:
1042; GFX11:       ; %bb.0:
1043; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1044; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1045; GFX11-NEXT:    s_setpc_b64 s[30:31]
1046  %min = call half @llvm.minnum.f16(half %a, half %a)
1047  %min.fneg = fneg half %min
1048  ret half %min.fneg
1049}
1050
1051define half @v_fneg_posk_minnum_f16_ieee(half %a) #0 {
1052; SI-LABEL: v_fneg_posk_minnum_f16_ieee:
1053; SI:       ; %bb.0:
1054; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1055; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1056; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1057; SI-NEXT:    v_max_f32_e32 v0, -4.0, v0
1058; SI-NEXT:    s_setpc_b64 s[30:31]
1059;
1060; VI-LABEL: v_fneg_posk_minnum_f16_ieee:
1061; VI:       ; %bb.0:
1062; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1064; VI-NEXT:    v_max_f16_e32 v0, -4.0, v0
1065; VI-NEXT:    s_setpc_b64 s[30:31]
1066;
1067; GFX11-LABEL: v_fneg_posk_minnum_f16_ieee:
1068; GFX11:       ; %bb.0:
1069; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1070; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1071; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1072; GFX11-NEXT:    v_max_f16_e32 v0, -4.0, v0
1073; GFX11-NEXT:    s_setpc_b64 s[30:31]
1074  %min = call half @llvm.minnum.f16(half 4.0, half %a)
1075  %fneg = fneg half %min
1076  ret half %fneg
1077}
1078
1079define half @v_fneg_posk_minnum_f16_no_ieee(half %a) #4 {
1080; SI-LABEL: v_fneg_posk_minnum_f16_no_ieee:
1081; SI:       ; %bb.0:
1082; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1083; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1084; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1085; SI-NEXT:    v_max_f32_e32 v0, -4.0, v0
1086; SI-NEXT:    s_setpc_b64 s[30:31]
1087;
1088; VI-LABEL: v_fneg_posk_minnum_f16_no_ieee:
1089; VI:       ; %bb.0:
1090; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1091; VI-NEXT:    v_max_f16_e64 v0, -v0, -4.0
1092; VI-NEXT:    s_setpc_b64 s[30:31]
1093;
1094; GFX11-LABEL: v_fneg_posk_minnum_f16_no_ieee:
1095; GFX11:       ; %bb.0:
1096; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1097; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -4.0
1098; GFX11-NEXT:    s_setpc_b64 s[30:31]
1099  %min = call half @llvm.minnum.f16(half 4.0, half %a)
1100  %fneg = fneg half %min
1101  ret half %fneg
1102}
1103
1104define half @v_fneg_negk_minnum_f16_ieee(half %a) #0 {
1105; SI-LABEL: v_fneg_negk_minnum_f16_ieee:
1106; SI:       ; %bb.0:
1107; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1108; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1109; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1110; SI-NEXT:    v_max_f32_e32 v0, 4.0, v0
1111; SI-NEXT:    s_setpc_b64 s[30:31]
1112;
1113; VI-LABEL: v_fneg_negk_minnum_f16_ieee:
1114; VI:       ; %bb.0:
1115; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1116; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1117; VI-NEXT:    v_max_f16_e32 v0, 4.0, v0
1118; VI-NEXT:    s_setpc_b64 s[30:31]
1119;
1120; GFX11-LABEL: v_fneg_negk_minnum_f16_ieee:
1121; GFX11:       ; %bb.0:
1122; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1123; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1124; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1125; GFX11-NEXT:    v_max_f16_e32 v0, 4.0, v0
1126; GFX11-NEXT:    s_setpc_b64 s[30:31]
1127  %min = call half @llvm.minnum.f16(half -4.0, half %a)
1128  %fneg = fneg half %min
1129  ret half %fneg
1130}
1131
1132define half @v_fneg_negk_minnum_f16_no_ieee(half %a) #4 {
1133; SI-LABEL: v_fneg_negk_minnum_f16_no_ieee:
1134; SI:       ; %bb.0:
1135; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1137; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1138; SI-NEXT:    v_max_f32_e32 v0, 4.0, v0
1139; SI-NEXT:    s_setpc_b64 s[30:31]
1140;
1141; VI-LABEL: v_fneg_negk_minnum_f16_no_ieee:
1142; VI:       ; %bb.0:
1143; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144; VI-NEXT:    v_max_f16_e64 v0, -v0, 4.0
1145; VI-NEXT:    s_setpc_b64 s[30:31]
1146;
1147; GFX11-LABEL: v_fneg_negk_minnum_f16_no_ieee:
1148; GFX11:       ; %bb.0:
1149; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150; GFX11-NEXT:    v_max_f16_e64 v0, -v0, 4.0
1151; GFX11-NEXT:    s_setpc_b64 s[30:31]
1152  %min = call half @llvm.minnum.f16(half -4.0, half %a)
1153  %fneg = fneg half %min
1154  ret half %fneg
1155}
1156
1157define half @v_fneg_0_minnum_f16(half %a) #0 {
1158; SI-LABEL: v_fneg_0_minnum_f16:
1159; SI:       ; %bb.0:
1160; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1161; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1162; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1163; SI-NEXT:    v_min_f32_e32 v0, 0, v0
1164; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1165; SI-NEXT:    s_setpc_b64 s[30:31]
1166;
1167; VI-LABEL: v_fneg_0_minnum_f16:
1168; VI:       ; %bb.0:
1169; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1170; VI-NEXT:    v_min_f16_e32 v0, 0, v0
1171; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1172; VI-NEXT:    s_setpc_b64 s[30:31]
1173;
1174; GFX11-LABEL: v_fneg_0_minnum_f16:
1175; GFX11:       ; %bb.0:
1176; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1177; GFX11-NEXT:    v_min_f16_e32 v0, 0, v0
1178; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1179; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1180; GFX11-NEXT:    s_setpc_b64 s[30:31]
1181  %min = call nnan half @llvm.minnum.f16(half 0.0, half %a)
1182  %fneg = fneg half %min
1183  ret half %fneg
1184}
1185
1186define half @v_fneg_neg0_minnum_f16_ieee(half %a) #0 {
1187; SI-LABEL: v_fneg_neg0_minnum_f16_ieee:
1188; SI:       ; %bb.0:
1189; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1190; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1191; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1192; SI-NEXT:    v_max_f32_e32 v0, 0, v0
1193; SI-NEXT:    s_setpc_b64 s[30:31]
1194;
1195; VI-LABEL: v_fneg_neg0_minnum_f16_ieee:
1196; VI:       ; %bb.0:
1197; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1199; VI-NEXT:    v_max_f16_e32 v0, 0, v0
1200; VI-NEXT:    s_setpc_b64 s[30:31]
1201;
1202; GFX11-LABEL: v_fneg_neg0_minnum_f16_ieee:
1203; GFX11:       ; %bb.0:
1204; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1205; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1206; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1207; GFX11-NEXT:    v_max_f16_e32 v0, 0, v0
1208; GFX11-NEXT:    s_setpc_b64 s[30:31]
1209  %min = call half @llvm.minnum.f16(half -0.0, half %a)
1210  %fneg = fneg half %min
1211  ret half %fneg
1212}
1213
1214define half @v_fneg_inv2pi_minnum_f16(half %a) #0 {
1215; SI-LABEL: v_fneg_inv2pi_minnum_f16:
1216; SI:       ; %bb.0:
1217; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1218; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1219; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1220; SI-NEXT:    v_max_f32_e32 v0, 0xbe230000, v0
1221; SI-NEXT:    s_setpc_b64 s[30:31]
1222;
1223; VI-LABEL: v_fneg_inv2pi_minnum_f16:
1224; VI:       ; %bb.0:
1225; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1226; VI-NEXT:    v_max_f16_e32 v0, v0, v0
1227; VI-NEXT:    v_min_f16_e32 v0, 0.15915494, v0
1228; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1229; VI-NEXT:    s_setpc_b64 s[30:31]
1230;
1231; GFX11-LABEL: v_fneg_inv2pi_minnum_f16:
1232; GFX11:       ; %bb.0:
1233; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1234; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
1235; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1236; GFX11-NEXT:    v_min_f16_e32 v0, 0.15915494, v0
1237; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1238; GFX11-NEXT:    s_setpc_b64 s[30:31]
1239  %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
1240  %fneg = fneg half %min
1241  ret half %fneg
1242}
1243
1244define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 {
1245; SI-LABEL: v_fneg_neg_inv2pi_minnum_f16:
1246; SI:       ; %bb.0:
1247; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1249; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1250; SI-NEXT:    v_max_f32_e32 v0, 0xbe230000, v0
1251; SI-NEXT:    s_setpc_b64 s[30:31]
1252;
1253; VI-LABEL: v_fneg_neg_inv2pi_minnum_f16:
1254; VI:       ; %bb.0:
1255; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1256; VI-NEXT:    v_max_f16_e32 v0, v0, v0
1257; VI-NEXT:    v_min_f16_e32 v0, 0.15915494, v0
1258; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1259; VI-NEXT:    s_setpc_b64 s[30:31]
1260;
1261; GFX11-LABEL: v_fneg_neg_inv2pi_minnum_f16:
1262; GFX11:       ; %bb.0:
1263; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1264; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
1265; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1266; GFX11-NEXT:    v_min_f16_e32 v0, 0.15915494, v0
1267; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1268; GFX11-NEXT:    s_setpc_b64 s[30:31]
1269  %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
1270  %fneg = fneg half %min
1271  ret half %fneg
1272}
1273
1274define half @v_fneg_neg0_minnum_f16_no_ieee(half %a) #4 {
1275; SI-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
1276; SI:       ; %bb.0:
1277; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1278; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1279; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1280; SI-NEXT:    v_max_f32_e32 v0, 0, v0
1281; SI-NEXT:    s_setpc_b64 s[30:31]
1282;
1283; VI-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
1284; VI:       ; %bb.0:
1285; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1286; VI-NEXT:    v_max_f16_e64 v0, -v0, 0
1287; VI-NEXT:    s_setpc_b64 s[30:31]
1288;
1289; GFX11-LABEL: v_fneg_neg0_minnum_f16_no_ieee:
1290; GFX11:       ; %bb.0:
1291; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1292; GFX11-NEXT:    v_max_f16_e64 v0, -v0, 0
1293; GFX11-NEXT:    s_setpc_b64 s[30:31]
1294  %min = call half @llvm.minnum.f16(half -0.0, half %a)
1295  %fneg = fneg half %min
1296  ret half %fneg
1297}
1298
1299define half @v_fneg_0_minnum_foldable_use_f16_ieee(half %a, half %b) #0 {
1300; SI-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
1301; SI:       ; %bb.0:
1302; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1303; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1304; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1305; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1306; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1307; SI-NEXT:    v_min_f32_e32 v0, 0, v0
1308; SI-NEXT:    v_mul_f32_e64 v0, -v0, v1
1309; SI-NEXT:    s_setpc_b64 s[30:31]
1310;
1311; VI-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
1312; VI:       ; %bb.0:
1313; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1314; VI-NEXT:    v_max_f16_e32 v0, v0, v0
1315; VI-NEXT:    v_min_f16_e32 v0, 0, v0
1316; VI-NEXT:    v_mul_f16_e64 v0, -v0, v1
1317; VI-NEXT:    s_setpc_b64 s[30:31]
1318;
1319; GFX11-LABEL: v_fneg_0_minnum_foldable_use_f16_ieee:
1320; GFX11:       ; %bb.0:
1321; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1322; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
1323; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1324; GFX11-NEXT:    v_min_f16_e32 v0, 0, v0
1325; GFX11-NEXT:    v_mul_f16_e64 v0, -v0, v1
1326; GFX11-NEXT:    s_setpc_b64 s[30:31]
1327  %min = call half @llvm.minnum.f16(half 0.0, half %a)
1328  %fneg = fneg half %min
1329  %mul = fmul half %fneg, %b
1330  ret half %mul
1331}
1332
1333define half @v_fneg_inv2pi_minnum_foldable_use_f16(half %a, half %b) #0 {
1334; SI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
1335; SI:       ; %bb.0:
1336; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1337; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1338; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1339; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1340; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1341; SI-NEXT:    v_max_f32_e32 v0, 0xbe230000, v0
1342; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
1343; SI-NEXT:    s_setpc_b64 s[30:31]
1344;
1345; VI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
1346; VI:       ; %bb.0:
1347; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1348; VI-NEXT:    v_max_f16_e32 v0, v0, v0
1349; VI-NEXT:    v_min_f16_e32 v0, 0.15915494, v0
1350; VI-NEXT:    v_mul_f16_e64 v0, -v0, v1
1351; VI-NEXT:    s_setpc_b64 s[30:31]
1352;
1353; GFX11-LABEL: v_fneg_inv2pi_minnum_foldable_use_f16:
1354; GFX11:       ; %bb.0:
1355; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
1357; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1358; GFX11-NEXT:    v_min_f16_e32 v0, 0.15915494, v0
1359; GFX11-NEXT:    v_mul_f16_e64 v0, -v0, v1
1360; GFX11-NEXT:    s_setpc_b64 s[30:31]
1361  %min = call half @llvm.minnum.f16(half 0xH3118, half %a)
1362  %fneg = fneg half %min
1363  %mul = fmul half %fneg, %b
1364  ret half %mul
1365}
1366
1367define half @v_fneg_0_minnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
1368; SI-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
1369; SI:       ; %bb.0:
1370; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1372; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1373; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1374; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1375; SI-NEXT:    v_min_f32_e32 v0, 0, v0
1376; SI-NEXT:    v_mul_f32_e64 v0, -v0, v1
1377; SI-NEXT:    s_setpc_b64 s[30:31]
1378;
1379; VI-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
1380; VI:       ; %bb.0:
1381; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1382; VI-NEXT:    v_min_f16_e32 v0, 0, v0
1383; VI-NEXT:    v_mul_f16_e64 v0, -v0, v1
1384; VI-NEXT:    s_setpc_b64 s[30:31]
1385;
1386; GFX11-LABEL: v_fneg_0_minnum_foldable_use_f16_no_ieee:
1387; GFX11:       ; %bb.0:
1388; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389; GFX11-NEXT:    v_min_f16_e32 v0, 0, v0
1390; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1391; GFX11-NEXT:    v_mul_f16_e64 v0, -v0, v1
1392; GFX11-NEXT:    s_setpc_b64 s[30:31]
1393  %min = call half @llvm.minnum.f16(half 0.0, half %a)
1394  %fneg = fneg half %min
1395  %mul = fmul half %fneg, %b
1396  ret half %mul
1397}
1398
1399define { half, half } @v_fneg_minnum_multi_use_minnum_f16_ieee(half %a, half %b) #0 {
1400; SI-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
1401; SI:       ; %bb.0:
1402; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1403; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1404; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1405; SI-NEXT:    v_cvt_f32_f16_e64 v1, -v1
1406; SI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
1407; SI-NEXT:    v_max_f32_e32 v0, v0, v1
1408; SI-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1409; SI-NEXT:    s_setpc_b64 s[30:31]
1410;
1411; VI-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
1412; VI:       ; %bb.0:
1413; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1414; VI-NEXT:    v_max_f16_e64 v1, -v1, -v1
1415; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1416; VI-NEXT:    v_max_f16_e32 v0, v0, v1
1417; VI-NEXT:    v_mul_f16_e32 v1, -4.0, v0
1418; VI-NEXT:    s_setpc_b64 s[30:31]
1419;
1420; GFX11-LABEL: v_fneg_minnum_multi_use_minnum_f16_ieee:
1421; GFX11:       ; %bb.0:
1422; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1423; GFX11-NEXT:    v_max_f16_e64 v1, -v1, -v1
1424; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1425; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1426; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
1427; GFX11-NEXT:    v_mul_f16_e32 v1, -4.0, v0
1428; GFX11-NEXT:    s_setpc_b64 s[30:31]
1429  %min = call half @llvm.minnum.f16(half %a, half %b)
1430  %fneg = fneg half %min
1431  %use1 = fmul half %min, 4.0
1432  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
1433  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
1434  ret { half, half } %insert.1
1435}
1436
1437define <2 x half> @v_fneg_minnum_multi_use_minnum_f16_no_ieee(half %a, half %b) #4 {
1438; SI-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
1439; SI:       ; %bb.0:
1440; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1441; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1442; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1443; SI-NEXT:    v_cvt_f32_f16_e64 v1, -v1
1444; SI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
1445; SI-NEXT:    v_max_f32_e32 v0, v0, v1
1446; SI-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1447; SI-NEXT:    s_setpc_b64 s[30:31]
1448;
1449; VI-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
1450; VI:       ; %bb.0:
1451; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1452; VI-NEXT:    v_max_f16_e64 v0, -v0, -v1
1453; VI-NEXT:    v_mov_b32_e32 v1, 0xc400
1454; VI-NEXT:    v_mul_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1455; VI-NEXT:    v_or_b32_e32 v0, v0, v1
1456; VI-NEXT:    s_setpc_b64 s[30:31]
1457;
1458; GFX11-LABEL: v_fneg_minnum_multi_use_minnum_f16_no_ieee:
1459; GFX11:       ; %bb.0:
1460; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1461; GFX11-NEXT:    v_min_f16_e32 v0, v0, v1
1462; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1463; GFX11-NEXT:    v_mul_f16_e32 v1, 4.0, v0
1464; GFX11-NEXT:    v_pack_b32_f16 v0, -v0, v1
1465; GFX11-NEXT:    s_setpc_b64 s[30:31]
1466  %min = call half @llvm.minnum.f16(half %a, half %b)
1467  %fneg = fneg half %min
1468  %use1 = fmul half %min, 4.0
1469  %ins0 = insertelement <2 x half> undef, half %fneg, i32 0
1470  %ins1 = insertelement <2 x half> %ins0, half %use1, i32 1
1471  ret <2 x half> %ins1
1472}
1473
1474; --------------------------------------------------------------------------------
1475; fmaxnum tests
1476; --------------------------------------------------------------------------------
1477
1478define half @v_fneg_maxnum_f16_ieee(half %a, half %b) #0 {
1479; SI-LABEL: v_fneg_maxnum_f16_ieee:
1480; SI:       ; %bb.0:
1481; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1482; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
1483; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1484; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1485; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1486; SI-NEXT:    v_min_f32_e32 v0, v0, v1
1487; SI-NEXT:    s_setpc_b64 s[30:31]
1488;
1489; VI-LABEL: v_fneg_maxnum_f16_ieee:
1490; VI:       ; %bb.0:
1491; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1492; VI-NEXT:    v_max_f16_e64 v1, -v1, -v1
1493; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1494; VI-NEXT:    v_min_f16_e32 v0, v0, v1
1495; VI-NEXT:    s_setpc_b64 s[30:31]
1496;
1497; GFX11-LABEL: v_fneg_maxnum_f16_ieee:
1498; GFX11:       ; %bb.0:
1499; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1500; GFX11-NEXT:    v_max_f16_e64 v1, -v1, -v1
1501; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1502; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1503; GFX11-NEXT:    v_min_f16_e32 v0, v0, v1
1504; GFX11-NEXT:    s_setpc_b64 s[30:31]
1505  %max = call half @llvm.maxnum.f16(half %a, half %b)
1506  %fneg = fneg half %max
1507  ret half %fneg
1508}
1509
1510define half @v_fneg_maxnum_f16_no_ieee(half %a, half %b) #4 {
1511; SI-LABEL: v_fneg_maxnum_f16_no_ieee:
1512; SI:       ; %bb.0:
1513; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1514; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
1515; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1516; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1517; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1518; SI-NEXT:    v_min_f32_e32 v0, v0, v1
1519; SI-NEXT:    s_setpc_b64 s[30:31]
1520;
1521; VI-LABEL: v_fneg_maxnum_f16_no_ieee:
1522; VI:       ; %bb.0:
1523; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1524; VI-NEXT:    v_min_f16_e64 v0, -v0, -v1
1525; VI-NEXT:    s_setpc_b64 s[30:31]
1526;
1527; GFX11-LABEL: v_fneg_maxnum_f16_no_ieee:
1528; GFX11:       ; %bb.0:
1529; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530; GFX11-NEXT:    v_min_f16_e64 v0, -v0, -v1
1531; GFX11-NEXT:    s_setpc_b64 s[30:31]
1532  %max = call half @llvm.maxnum.f16(half %a, half %b)
1533  %fneg = fneg half %max
1534  ret half %fneg
1535}
1536
1537define half @v_fneg_self_maxnum_f16_ieee(half %a) #0 {
1538; SI-LABEL: v_fneg_self_maxnum_f16_ieee:
1539; SI:       ; %bb.0:
1540; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1541; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1542; SI-NEXT:    s_setpc_b64 s[30:31]
1543;
1544; VI-LABEL: v_fneg_self_maxnum_f16_ieee:
1545; VI:       ; %bb.0:
1546; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1547; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1548; VI-NEXT:    s_setpc_b64 s[30:31]
1549;
1550; GFX11-LABEL: v_fneg_self_maxnum_f16_ieee:
1551; GFX11:       ; %bb.0:
1552; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1553; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1554; GFX11-NEXT:    s_setpc_b64 s[30:31]
1555  %max = call half @llvm.maxnum.f16(half %a, half %a)
1556  %max.fneg = fneg half %max
1557  ret half %max.fneg
1558}
1559
1560define half @v_fneg_self_maxnum_f16_no_ieee(half %a) #4 {
1561; SI-LABEL: v_fneg_self_maxnum_f16_no_ieee:
1562; SI:       ; %bb.0:
1563; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1564; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1565; SI-NEXT:    s_setpc_b64 s[30:31]
1566;
1567; VI-LABEL: v_fneg_self_maxnum_f16_no_ieee:
1568; VI:       ; %bb.0:
1569; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1570; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1571; VI-NEXT:    s_setpc_b64 s[30:31]
1572;
1573; GFX11-LABEL: v_fneg_self_maxnum_f16_no_ieee:
1574; GFX11:       ; %bb.0:
1575; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1576; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1577; GFX11-NEXT:    s_setpc_b64 s[30:31]
1578  %max = call half @llvm.maxnum.f16(half %a, half %a)
1579  %max.fneg = fneg half %max
1580  ret half %max.fneg
1581}
1582
1583define half @v_fneg_posk_maxnum_f16_ieee(half %a) #0 {
1584; SI-LABEL: v_fneg_posk_maxnum_f16_ieee:
1585; SI:       ; %bb.0:
1586; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1587; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1588; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1589; SI-NEXT:    v_min_f32_e32 v0, -4.0, v0
1590; SI-NEXT:    s_setpc_b64 s[30:31]
1591;
1592; VI-LABEL: v_fneg_posk_maxnum_f16_ieee:
1593; VI:       ; %bb.0:
1594; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1595; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1596; VI-NEXT:    v_min_f16_e32 v0, -4.0, v0
1597; VI-NEXT:    s_setpc_b64 s[30:31]
1598;
1599; GFX11-LABEL: v_fneg_posk_maxnum_f16_ieee:
1600; GFX11:       ; %bb.0:
1601; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1602; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1603; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1604; GFX11-NEXT:    v_min_f16_e32 v0, -4.0, v0
1605; GFX11-NEXT:    s_setpc_b64 s[30:31]
1606  %max = call half @llvm.maxnum.f16(half 4.0, half %a)
1607  %fneg = fneg half %max
1608  ret half %fneg
1609}
1610
1611define half @v_fneg_posk_maxnum_f16_no_ieee(half %a) #4 {
1612; SI-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
1613; SI:       ; %bb.0:
1614; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1615; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1616; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1617; SI-NEXT:    v_min_f32_e32 v0, -4.0, v0
1618; SI-NEXT:    s_setpc_b64 s[30:31]
1619;
1620; VI-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
1621; VI:       ; %bb.0:
1622; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1623; VI-NEXT:    v_min_f16_e64 v0, -v0, -4.0
1624; VI-NEXT:    s_setpc_b64 s[30:31]
1625;
1626; GFX11-LABEL: v_fneg_posk_maxnum_f16_no_ieee:
1627; GFX11:       ; %bb.0:
1628; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1629; GFX11-NEXT:    v_min_f16_e64 v0, -v0, -4.0
1630; GFX11-NEXT:    s_setpc_b64 s[30:31]
1631  %max = call half @llvm.maxnum.f16(half 4.0, half %a)
1632  %fneg = fneg half %max
1633  ret half %fneg
1634}
1635
1636define half @v_fneg_negk_maxnum_f16_ieee(half %a) #0 {
1637; SI-LABEL: v_fneg_negk_maxnum_f16_ieee:
1638; SI:       ; %bb.0:
1639; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1640; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1641; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1642; SI-NEXT:    v_min_f32_e32 v0, 4.0, v0
1643; SI-NEXT:    s_setpc_b64 s[30:31]
1644;
1645; VI-LABEL: v_fneg_negk_maxnum_f16_ieee:
1646; VI:       ; %bb.0:
1647; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1648; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1649; VI-NEXT:    v_min_f16_e32 v0, 4.0, v0
1650; VI-NEXT:    s_setpc_b64 s[30:31]
1651;
1652; GFX11-LABEL: v_fneg_negk_maxnum_f16_ieee:
1653; GFX11:       ; %bb.0:
1654; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1655; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1656; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1657; GFX11-NEXT:    v_min_f16_e32 v0, 4.0, v0
1658; GFX11-NEXT:    s_setpc_b64 s[30:31]
1659  %max = call half @llvm.maxnum.f16(half -4.0, half %a)
1660  %fneg = fneg half %max
1661  ret half %fneg
1662}
1663
1664define half @v_fneg_negk_maxnum_f16_no_ieee(half %a) #4 {
1665; SI-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
1666; SI:       ; %bb.0:
1667; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1669; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1670; SI-NEXT:    v_min_f32_e32 v0, 4.0, v0
1671; SI-NEXT:    s_setpc_b64 s[30:31]
1672;
1673; VI-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
1674; VI:       ; %bb.0:
1675; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1676; VI-NEXT:    v_min_f16_e64 v0, -v0, 4.0
1677; VI-NEXT:    s_setpc_b64 s[30:31]
1678;
1679; GFX11-LABEL: v_fneg_negk_maxnum_f16_no_ieee:
1680; GFX11:       ; %bb.0:
1681; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682; GFX11-NEXT:    v_min_f16_e64 v0, -v0, 4.0
1683; GFX11-NEXT:    s_setpc_b64 s[30:31]
1684  %max = call half @llvm.maxnum.f16(half -4.0, half %a)
1685  %fneg = fneg half %max
1686  ret half %fneg
1687}
1688
1689define half @v_fneg_0_maxnum_f16(half %a) #0 {
1690; SI-LABEL: v_fneg_0_maxnum_f16:
1691; SI:       ; %bb.0:
1692; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1693; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1694; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1695; SI-NEXT:    v_max_f32_e32 v0, 0, v0
1696; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1697; SI-NEXT:    s_setpc_b64 s[30:31]
1698;
1699; VI-LABEL: v_fneg_0_maxnum_f16:
1700; VI:       ; %bb.0:
1701; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1702; VI-NEXT:    v_max_f16_e32 v0, 0, v0
1703; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1704; VI-NEXT:    s_setpc_b64 s[30:31]
1705;
1706; GFX11-LABEL: v_fneg_0_maxnum_f16:
1707; GFX11:       ; %bb.0:
1708; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1709; GFX11-NEXT:    v_max_f16_e32 v0, 0, v0
1710; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1711; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1712; GFX11-NEXT:    s_setpc_b64 s[30:31]
1713  %max = call nnan half @llvm.maxnum.f16(half 0.0, half %a)
1714  %fneg = fneg half %max
1715  ret half %fneg
1716}
1717
1718define half @v_fneg_neg0_maxnum_f16_ieee(half %a) #0 {
1719; SI-LABEL: v_fneg_neg0_maxnum_f16_ieee:
1720; SI:       ; %bb.0:
1721; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1722; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1723; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1724; SI-NEXT:    v_min_f32_e32 v0, 0, v0
1725; SI-NEXT:    s_setpc_b64 s[30:31]
1726;
1727; VI-LABEL: v_fneg_neg0_maxnum_f16_ieee:
1728; VI:       ; %bb.0:
1729; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1730; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1731; VI-NEXT:    v_min_f16_e32 v0, 0, v0
1732; VI-NEXT:    s_setpc_b64 s[30:31]
1733;
1734; GFX11-LABEL: v_fneg_neg0_maxnum_f16_ieee:
1735; GFX11:       ; %bb.0:
1736; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1737; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1738; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1739; GFX11-NEXT:    v_min_f16_e32 v0, 0, v0
1740; GFX11-NEXT:    s_setpc_b64 s[30:31]
1741  %max = call half @llvm.maxnum.f16(half -0.0, half %a)
1742  %fneg = fneg half %max
1743  ret half %fneg
1744}
1745
1746define half @v_fneg_neg0_maxnum_f16_no_ieee(half %a) #4 {
1747; SI-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
1748; SI:       ; %bb.0:
1749; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1750; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
1751; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1752; SI-NEXT:    v_min_f32_e32 v0, 0, v0
1753; SI-NEXT:    s_setpc_b64 s[30:31]
1754;
1755; VI-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
1756; VI:       ; %bb.0:
1757; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1758; VI-NEXT:    v_min_f16_e64 v0, -v0, 0
1759; VI-NEXT:    s_setpc_b64 s[30:31]
1760;
1761; GFX11-LABEL: v_fneg_neg0_maxnum_f16_no_ieee:
1762; GFX11:       ; %bb.0:
1763; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1764; GFX11-NEXT:    v_min_f16_e64 v0, -v0, 0
1765; GFX11-NEXT:    s_setpc_b64 s[30:31]
1766  %max = call half @llvm.maxnum.f16(half -0.0, half %a)
1767  %fneg = fneg half %max
1768  ret half %fneg
1769}
1770
1771define half @v_fneg_0_maxnum_foldable_use_f16_ieee(half %a, half %b) #0 {
1772; SI-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
1773; SI:       ; %bb.0:
1774; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1775; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1776; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1777; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1778; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1779; SI-NEXT:    v_max_f32_e32 v0, 0, v0
1780; SI-NEXT:    v_mul_f32_e64 v0, -v0, v1
1781; SI-NEXT:    s_setpc_b64 s[30:31]
1782;
1783; VI-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
1784; VI:       ; %bb.0:
1785; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1786; VI-NEXT:    v_max_f16_e32 v0, v0, v0
1787; VI-NEXT:    v_max_f16_e32 v0, 0, v0
1788; VI-NEXT:    v_mul_f16_e64 v0, -v0, v1
1789; VI-NEXT:    s_setpc_b64 s[30:31]
1790;
1791; GFX11-LABEL: v_fneg_0_maxnum_foldable_use_f16_ieee:
1792; GFX11:       ; %bb.0:
1793; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1794; GFX11-NEXT:    v_max_f16_e32 v0, v0, v0
1795; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1796; GFX11-NEXT:    v_max_f16_e32 v0, 0, v0
1797; GFX11-NEXT:    v_mul_f16_e64 v0, -v0, v1
1798; GFX11-NEXT:    s_setpc_b64 s[30:31]
1799  %max = call half @llvm.maxnum.f16(half 0.0, half %a)
1800  %fneg = fneg half %max
1801  %mul = fmul half %fneg, %b
1802  ret half %mul
1803}
1804
1805define half @v_fneg_0_maxnum_foldable_use_f16_no_ieee(half %a, half %b) #4 {
1806; SI-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
1807; SI:       ; %bb.0:
1808; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1809; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1810; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1811; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1812; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1813; SI-NEXT:    v_max_f32_e32 v0, 0, v0
1814; SI-NEXT:    v_mul_f32_e64 v0, -v0, v1
1815; SI-NEXT:    s_setpc_b64 s[30:31]
1816;
1817; VI-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
1818; VI:       ; %bb.0:
1819; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1820; VI-NEXT:    v_max_f16_e32 v0, 0, v0
1821; VI-NEXT:    v_mul_f16_e64 v0, -v0, v1
1822; VI-NEXT:    s_setpc_b64 s[30:31]
1823;
1824; GFX11-LABEL: v_fneg_0_maxnum_foldable_use_f16_no_ieee:
1825; GFX11:       ; %bb.0:
1826; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1827; GFX11-NEXT:    v_max_f16_e32 v0, 0, v0
1828; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1829; GFX11-NEXT:    v_mul_f16_e64 v0, -v0, v1
1830; GFX11-NEXT:    s_setpc_b64 s[30:31]
1831  %max = call half @llvm.maxnum.f16(half 0.0, half %a)
1832  %fneg = fneg half %max
1833  %mul = fmul half %fneg, %b
1834  ret half %mul
1835}
1836
1837define { half, half } @v_fneg_maxnum_multi_use_maxnum_f16_ieee(half %a, half %b) #0 {
1838; SI-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
1839; SI:       ; %bb.0:
1840; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1841; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1842; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1843; SI-NEXT:    v_cvt_f32_f16_e64 v1, -v1
1844; SI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
1845; SI-NEXT:    v_min_f32_e32 v0, v0, v1
1846; SI-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1847; SI-NEXT:    s_setpc_b64 s[30:31]
1848;
1849; VI-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
1850; VI:       ; %bb.0:
1851; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1852; VI-NEXT:    v_max_f16_e64 v1, -v1, -v1
1853; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
1854; VI-NEXT:    v_min_f16_e32 v0, v0, v1
1855; VI-NEXT:    v_mul_f16_e32 v1, -4.0, v0
1856; VI-NEXT:    s_setpc_b64 s[30:31]
1857;
1858; GFX11-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_ieee:
1859; GFX11:       ; %bb.0:
1860; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1861; GFX11-NEXT:    v_max_f16_e64 v1, -v1, -v1
1862; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
1863; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1864; GFX11-NEXT:    v_min_f16_e32 v0, v0, v1
1865; GFX11-NEXT:    v_mul_f16_e32 v1, -4.0, v0
1866; GFX11-NEXT:    s_setpc_b64 s[30:31]
1867  %max = call half @llvm.maxnum.f16(half %a, half %b)
1868  %fneg = fneg half %max
1869  %use1 = fmul half %max, 4.0
1870  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
1871  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
1872  ret { half, half } %insert.1
1873}
1874
1875define <2 x half> @v_fneg_maxnum_multi_use_maxnum_f16_no_ieee(half %a, half %b) #4 {
1876; SI-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
1877; SI:       ; %bb.0:
1878; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1880; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1881; SI-NEXT:    v_cvt_f32_f16_e64 v1, -v1
1882; SI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
1883; SI-NEXT:    v_min_f32_e32 v0, v0, v1
1884; SI-NEXT:    v_mul_f32_e32 v1, -4.0, v0
1885; SI-NEXT:    s_setpc_b64 s[30:31]
1886;
1887; VI-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
1888; VI:       ; %bb.0:
1889; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1890; VI-NEXT:    v_min_f16_e64 v0, -v0, -v1
1891; VI-NEXT:    v_mov_b32_e32 v1, 0xc400
1892; VI-NEXT:    v_mul_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1893; VI-NEXT:    v_or_b32_e32 v0, v0, v1
1894; VI-NEXT:    s_setpc_b64 s[30:31]
1895;
1896; GFX11-LABEL: v_fneg_maxnum_multi_use_maxnum_f16_no_ieee:
1897; GFX11:       ; %bb.0:
1898; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1899; GFX11-NEXT:    v_max_f16_e32 v0, v0, v1
1900; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1901; GFX11-NEXT:    v_mul_f16_e32 v1, 4.0, v0
1902; GFX11-NEXT:    v_pack_b32_f16 v0, -v0, v1
1903; GFX11-NEXT:    s_setpc_b64 s[30:31]
1904  %max = call half @llvm.maxnum.f16(half %a, half %b)
1905  %fneg = fneg half %max
1906  %use1 = fmul half %max, 4.0
1907  %ins0 = insertelement <2 x half> undef, half %fneg, i32 0
1908  %ins1 = insertelement <2 x half> %ins0, half %use1, i32 1
1909  ret <2 x half> %ins1
1910}
1911
1912; --------------------------------------------------------------------------------
1913; fma tests
1914; --------------------------------------------------------------------------------
1915
1916define half @v_fneg_fma_f16(half %a, half %b, half %c) #0 {
1917; SI-SAFE-LABEL: v_fneg_fma_f16:
1918; SI-SAFE:       ; %bb.0:
1919; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1920; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
1921; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
1922; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
1923; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
1924; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
1925; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
1926; SI-SAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
1927; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
1928; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1929;
1930; SI-NSZ-LABEL: v_fneg_fma_f16:
1931; SI-NSZ:       ; %bb.0:
1932; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1933; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
1934; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
1935; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
1936; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
1937; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
1938; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
1939; SI-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
1940; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1941;
1942; VI-SAFE-LABEL: v_fneg_fma_f16:
1943; VI-SAFE:       ; %bb.0:
1944; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1945; VI-SAFE-NEXT:    v_fma_f16 v0, v0, v1, v2
1946; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
1947; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1948;
1949; VI-NSZ-LABEL: v_fneg_fma_f16:
1950; VI-NSZ:       ; %bb.0:
1951; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1952; VI-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
1953; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1954;
1955; GFX11-SAFE-LABEL: v_fneg_fma_f16:
1956; GFX11-SAFE:       ; %bb.0:
1957; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1958; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, v0, v1
1959; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1960; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v2
1961; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
1962;
1963; GFX11-NSZ-LABEL: v_fneg_fma_f16:
1964; GFX11-NSZ:       ; %bb.0:
1965; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1966; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
1967; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
1968  %fma = call half @llvm.fma.f16(half %a, half %b, half %c)
1969  %fneg = fneg half %fma
1970  ret half %fneg
1971}
1972
1973define { half, half } @v_fneg_fma_store_use_fma_f16(half %a, half %b, half %c) #0 {
1974; SI-LABEL: v_fneg_fma_store_use_fma_f16:
1975; SI:       ; %bb.0:
1976; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1977; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1978; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1979; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1980; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1981; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1982; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1983; SI-NEXT:    v_fma_f32 v1, v0, v1, v2
1984; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
1985; SI-NEXT:    s_setpc_b64 s[30:31]
1986;
1987; VI-LABEL: v_fneg_fma_store_use_fma_f16:
1988; VI:       ; %bb.0:
1989; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1990; VI-NEXT:    v_fma_f16 v1, v0, v1, v2
1991; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
1992; VI-NEXT:    s_setpc_b64 s[30:31]
1993;
1994; GFX11-LABEL: v_fneg_fma_store_use_fma_f16:
1995; GFX11:       ; %bb.0:
1996; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1997; GFX11-NEXT:    v_fma_f16 v1, v0, v1, v2
1998; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1999; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
2000; GFX11-NEXT:    s_setpc_b64 s[30:31]
2001  %fma = call half @llvm.fma.f16(half %a, half %b, half %c)
2002  %fneg = fneg half %fma
2003  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
2004  %insert.1 = insertvalue { half, half } %insert.0, half %fma, 1
2005  ret { half, half } %insert.1
2006}
2007
2008define { half, half } @v_fneg_fma_multi_use_fma_f16(half %a, half %b, half %c) #0 {
2009; SI-SAFE-LABEL: v_fneg_fma_multi_use_fma_f16:
2010; SI-SAFE:       ; %bb.0:
2011; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2012; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2013; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2014; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2015; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2016; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2017; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2018; SI-SAFE-NEXT:    v_fma_f32 v1, v0, v1, v2
2019; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
2020; SI-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v1
2021; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2022;
2023; SI-NSZ-LABEL: v_fneg_fma_multi_use_fma_f16:
2024; SI-NSZ:       ; %bb.0:
2025; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2026; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2027; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2028; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2029; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2030; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2031; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2032; SI-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
2033; SI-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
2034; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2035;
2036; VI-SAFE-LABEL: v_fneg_fma_multi_use_fma_f16:
2037; VI-SAFE:       ; %bb.0:
2038; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2039; VI-SAFE-NEXT:    v_fma_f16 v1, v0, v1, v2
2040; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
2041; VI-SAFE-NEXT:    v_mul_f16_e32 v1, 4.0, v1
2042; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2043;
2044; VI-NSZ-LABEL: v_fneg_fma_multi_use_fma_f16:
2045; VI-NSZ:       ; %bb.0:
2046; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2047; VI-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
2048; VI-NSZ-NEXT:    v_mul_f16_e32 v1, -4.0, v0
2049; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2050;
2051; GFX11-SAFE-LABEL: v_fneg_fma_multi_use_fma_f16:
2052; GFX11-SAFE:       ; %bb.0:
2053; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2054; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, v0, v1
2055; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2056; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v2
2057; GFX11-SAFE-NEXT:    v_mul_f16_e32 v1, 4.0, v2
2058; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2059;
2060; GFX11-NSZ-LABEL: v_fneg_fma_multi_use_fma_f16:
2061; GFX11-NSZ:       ; %bb.0:
2062; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2063; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
2064; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2065; GFX11-NSZ-NEXT:    v_mul_f16_e32 v1, -4.0, v0
2066; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2067  %fma = call half @llvm.fma.f16(half %a, half %b, half %c)
2068  %fneg = fneg half %fma
2069  %use1 = fmul half %fma, 4.0
2070  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
2071  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
2072  ret { half, half } %insert.1
2073}
2074
2075define half @v_fneg_fma_fneg_x_y_f16(half %a, half %b, half %c) #0 {
2076; SI-SAFE-LABEL: v_fneg_fma_fneg_x_y_f16:
2077; SI-SAFE:       ; %bb.0:
2078; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2079; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2080; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2081; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2082; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2083; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2084; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2085; SI-SAFE-NEXT:    v_fma_f32 v0, -v0, v1, v2
2086; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2087; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2088;
2089; SI-NSZ-LABEL: v_fneg_fma_fneg_x_y_f16:
2090; SI-NSZ:       ; %bb.0:
2091; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2092; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2093; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2094; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2095; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2096; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2097; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2098; SI-NSZ-NEXT:    v_fma_f32 v0, v0, v1, -v2
2099; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2100;
2101; VI-SAFE-LABEL: v_fneg_fma_fneg_x_y_f16:
2102; VI-SAFE:       ; %bb.0:
2103; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2104; VI-SAFE-NEXT:    v_fma_f16 v0, -v0, v1, v2
2105; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2106; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2107;
2108; VI-NSZ-LABEL: v_fneg_fma_fneg_x_y_f16:
2109; VI-NSZ:       ; %bb.0:
2110; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2111; VI-NSZ-NEXT:    v_fma_f16 v0, v0, v1, -v2
2112; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2113;
2114; GFX11-SAFE-LABEL: v_fneg_fma_fneg_x_y_f16:
2115; GFX11-SAFE:       ; %bb.0:
2116; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2117; GFX11-SAFE-NEXT:    v_fma_f16 v0, -v0, v1, v2
2118; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2119; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2120; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2121;
2122; GFX11-NSZ-LABEL: v_fneg_fma_fneg_x_y_f16:
2123; GFX11-NSZ:       ; %bb.0:
2124; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2125; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, v1, -v2
2126; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2127  %fneg.a = fneg half %a
2128  %fma = call half @llvm.fma.f16(half %fneg.a, half %b, half %c)
2129  %fneg = fneg half %fma
2130  ret half %fneg
2131}
2132
2133define half @v_fneg_fma_x_fneg_y_f16(half %a, half %b, half %c) #0 {
2134; SI-SAFE-LABEL: v_fneg_fma_x_fneg_y_f16:
2135; SI-SAFE:       ; %bb.0:
2136; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2137; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2138; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2139; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2140; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2141; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2142; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2143; SI-SAFE-NEXT:    v_fma_f32 v0, v0, -v1, v2
2144; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2145; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2146;
2147; SI-NSZ-LABEL: v_fneg_fma_x_fneg_y_f16:
2148; SI-NSZ:       ; %bb.0:
2149; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2150; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2151; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2152; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2153; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2154; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2155; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2156; SI-NSZ-NEXT:    v_fma_f32 v0, v0, v1, -v2
2157; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2158;
2159; VI-SAFE-LABEL: v_fneg_fma_x_fneg_y_f16:
2160; VI-SAFE:       ; %bb.0:
2161; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2162; VI-SAFE-NEXT:    v_fma_f16 v0, v0, -v1, v2
2163; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2164; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2165;
2166; VI-NSZ-LABEL: v_fneg_fma_x_fneg_y_f16:
2167; VI-NSZ:       ; %bb.0:
2168; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2169; VI-NSZ-NEXT:    v_fma_f16 v0, v0, v1, -v2
2170; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2171;
2172; GFX11-SAFE-LABEL: v_fneg_fma_x_fneg_y_f16:
2173; GFX11-SAFE:       ; %bb.0:
2174; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2175; GFX11-SAFE-NEXT:    v_fma_f16 v0, v0, -v1, v2
2176; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2177; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2178; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2179;
2180; GFX11-NSZ-LABEL: v_fneg_fma_x_fneg_y_f16:
2181; GFX11-NSZ:       ; %bb.0:
2182; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2183; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, v1, -v2
2184; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2185  %fneg.b = fneg half %b
2186  %fma = call half @llvm.fma.f16(half %a, half %fneg.b, half %c)
2187  %fneg = fneg half %fma
2188  ret half %fneg
2189}
2190
2191define half @v_fneg_fma_fneg_fneg_y_f16(half %a, half %b, half %c) #0 {
2192; SI-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f16:
2193; SI-SAFE:       ; %bb.0:
2194; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2195; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2196; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2197; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2198; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2199; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2200; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2201; SI-SAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
2202; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2203; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2204;
2205; SI-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f16:
2206; SI-NSZ:       ; %bb.0:
2207; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2208; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2209; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2210; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2211; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2212; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2213; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2214; SI-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, -v2
2215; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2216;
2217; VI-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f16:
2218; VI-SAFE:       ; %bb.0:
2219; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2220; VI-SAFE-NEXT:    v_fma_f16 v0, v0, v1, v2
2221; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2222; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2223;
2224; VI-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f16:
2225; VI-NSZ:       ; %bb.0:
2226; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2227; VI-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
2228; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2229;
2230; GFX11-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f16:
2231; GFX11-SAFE:       ; %bb.0:
2232; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2233; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, v0, v1
2234; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2235; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v2
2236; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2237;
2238; GFX11-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f16:
2239; GFX11-NSZ:       ; %bb.0:
2240; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2241; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
2242; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2243  %fneg.a = fneg half %a
2244  %fneg.b = fneg half %b
2245  %fma = call half @llvm.fma.f16(half %fneg.a, half %fneg.b, half %c)
2246  %fneg = fneg half %fma
2247  ret half %fneg
2248}
2249
2250define half @v_fneg_fma_fneg_x_fneg_f16(half %a, half %b, half %c) #0 {
2251; SI-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f16:
2252; SI-SAFE:       ; %bb.0:
2253; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2254; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2255; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2256; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2257; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2258; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2259; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2260; SI-SAFE-NEXT:    v_fma_f32 v0, -v0, v1, -v2
2261; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2262; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2263;
2264; SI-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f16:
2265; SI-NSZ:       ; %bb.0:
2266; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2267; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2268; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2269; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2270; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2271; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2272; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2273; SI-NSZ-NEXT:    v_fma_f32 v0, v0, v1, v2
2274; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2275;
2276; VI-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f16:
2277; VI-SAFE:       ; %bb.0:
2278; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2279; VI-SAFE-NEXT:    v_fma_f16 v0, -v0, v1, -v2
2280; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2281; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2282;
2283; VI-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f16:
2284; VI-NSZ:       ; %bb.0:
2285; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2286; VI-NSZ-NEXT:    v_fma_f16 v0, v0, v1, v2
2287; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2288;
2289; GFX11-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f16:
2290; GFX11-SAFE:       ; %bb.0:
2291; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2292; GFX11-SAFE-NEXT:    v_fma_f16 v0, -v0, v1, -v2
2293; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2294; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2295; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2296;
2297; GFX11-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f16:
2298; GFX11-NSZ:       ; %bb.0:
2299; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2300; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, v1, v2
2301; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2302  %fneg.a = fneg half %a
2303  %fneg.c = fneg half %c
2304  %fma = call half @llvm.fma.f16(half %fneg.a, half %b, half %fneg.c)
2305  %fneg = fneg half %fma
2306  ret half %fneg
2307}
2308
2309define half @v_fneg_fma_x_y_fneg_f16(half %a, half %b, half %c) #0 {
2310; SI-SAFE-LABEL: v_fneg_fma_x_y_fneg_f16:
2311; SI-SAFE:       ; %bb.0:
2312; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2314; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2315; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2316; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2317; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2318; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2319; SI-SAFE-NEXT:    v_fma_f32 v0, v0, v1, -v2
2320; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2321; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2322;
2323; SI-NSZ-LABEL: v_fneg_fma_x_y_fneg_f16:
2324; SI-NSZ:       ; %bb.0:
2325; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2326; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2327; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2328; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2329; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2330; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2331; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2332; SI-NSZ-NEXT:    v_fma_f32 v0, v0, -v1, v2
2333; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2334;
2335; VI-SAFE-LABEL: v_fneg_fma_x_y_fneg_f16:
2336; VI-SAFE:       ; %bb.0:
2337; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2338; VI-SAFE-NEXT:    v_fma_f16 v0, v0, v1, -v2
2339; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2340; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2341;
2342; VI-NSZ-LABEL: v_fneg_fma_x_y_fneg_f16:
2343; VI-NSZ:       ; %bb.0:
2344; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2345; VI-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, v2
2346; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2347;
2348; GFX11-SAFE-LABEL: v_fneg_fma_x_y_fneg_f16:
2349; GFX11-SAFE:       ; %bb.0:
2350; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2351; GFX11-SAFE-NEXT:    v_fma_f16 v0, v0, v1, -v2
2352; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2353; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2354; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2355;
2356; GFX11-NSZ-LABEL: v_fneg_fma_x_y_fneg_f16:
2357; GFX11-NSZ:       ; %bb.0:
2358; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2359; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, v2
2360; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2361  %fneg.c = fneg half %c
2362  %fma = call half @llvm.fma.f16(half %a, half %b, half %fneg.c)
2363  %fneg = fneg half %fma
2364  ret half %fneg
2365}
2366
2367define { half, half } @v_fneg_fma_store_use_fneg_x_y_f16(half %a, half %b, half %c) #0 {
2368; SI-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
2369; SI-SAFE:       ; %bb.0:
2370; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2371; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2372; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2373; SI-SAFE-NEXT:    v_cvt_f16_f32_e64 v3, -v0
2374; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2375; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v1
2376; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
2377; SI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v0
2378; SI-SAFE-NEXT:    v_fma_f32 v0, v3, v4, v2
2379; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2380; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2381;
2382; SI-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
2383; SI-NSZ:       ; %bb.0:
2384; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2385; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2386; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2387; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2388; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2389; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v3, v1
2390; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v4, v0
2391; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v1, -v0
2392; SI-NSZ-NEXT:    v_fma_f32 v0, v4, v3, -v2
2393; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2394;
2395; VI-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
2396; VI-SAFE:       ; %bb.0:
2397; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2398; VI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x8000, v0
2399; VI-SAFE-NEXT:    v_fma_f16 v0, -v0, v1, v2
2400; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2401; VI-SAFE-NEXT:    v_mov_b32_e32 v1, v3
2402; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2403;
2404; VI-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
2405; VI-NSZ:       ; %bb.0:
2406; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2407; VI-NSZ-NEXT:    v_xor_b32_e32 v3, 0x8000, v0
2408; VI-NSZ-NEXT:    v_fma_f16 v0, v0, v1, -v2
2409; VI-NSZ-NEXT:    v_mov_b32_e32 v1, v3
2410; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2411;
2412; GFX11-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
2413; GFX11-SAFE:       ; %bb.0:
2414; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2415; GFX11-SAFE-NEXT:    v_fma_f16 v1, -v0, v1, v2
2416; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2417; GFX11-SAFE-NEXT:    v_xor_b32_e32 v2, 0x8000, v1
2418; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
2419; GFX11-SAFE-NEXT:    v_mov_b32_e32 v0, v2
2420; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2421;
2422; GFX11-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f16:
2423; GFX11-NSZ:       ; %bb.0:
2424; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2425; GFX11-NSZ-NEXT:    v_fma_f16 v2, v0, v1, -v2
2426; GFX11-NSZ-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
2427; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2428; GFX11-NSZ-NEXT:    v_mov_b32_e32 v0, v2
2429; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2430  %fneg.a = fneg half %a
2431  %fma = call half @llvm.fma.f16(half %fneg.a, half %b, half %c)
2432  %fneg = fneg half %fma
2433  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
2434  %insert.1 = insertvalue { half, half } %insert.0, half %fneg.a, 1
2435  ret { half, half } %insert.1
2436}
2437
2438define { half, half } @v_fneg_fma_multi_use_fneg_x_y_f16(half %a, half %b, half %c, half %d) #0 {
2439; SI-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
2440; SI-SAFE:       ; %bb.0:
2441; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2442; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2443; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2444; SI-SAFE-NEXT:    v_cvt_f16_f32_e64 v0, -v0
2445; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
2446; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2447; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2448; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v0
2449; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
2450; SI-SAFE-NEXT:    v_fma_f32 v0, v4, v1, v2
2451; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
2452; SI-SAFE-NEXT:    v_mul_f32_e32 v1, v4, v3
2453; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2454;
2455; SI-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
2456; SI-NSZ:       ; %bb.0:
2457; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2458; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v3, v3
2459; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2460; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2461; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2462; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v3, v3
2463; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2464; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2465; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v4, v0
2466; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v5, -v0
2467; SI-NSZ-NEXT:    v_fma_f32 v0, v4, v1, -v2
2468; SI-NSZ-NEXT:    v_mul_f32_e32 v1, v5, v3
2469; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2470;
2471; VI-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
2472; VI-SAFE:       ; %bb.0:
2473; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474; VI-SAFE-NEXT:    v_fma_f16 v1, -v0, v1, v2
2475; VI-SAFE-NEXT:    v_xor_b32_e32 v2, 0x8000, v1
2476; VI-SAFE-NEXT:    v_mul_f16_e64 v1, -v0, v3
2477; VI-SAFE-NEXT:    v_mov_b32_e32 v0, v2
2478; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2479;
2480; VI-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
2481; VI-NSZ:       ; %bb.0:
2482; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2483; VI-NSZ-NEXT:    v_fma_f16 v2, v0, v1, -v2
2484; VI-NSZ-NEXT:    v_mul_f16_e64 v1, -v0, v3
2485; VI-NSZ-NEXT:    v_mov_b32_e32 v0, v2
2486; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2487;
2488; GFX11-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
2489; GFX11-SAFE:       ; %bb.0:
2490; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2491; GFX11-SAFE-NEXT:    v_fma_f16 v1, -v0, v1, v2
2492; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2493; GFX11-SAFE-NEXT:    v_xor_b32_e32 v2, 0x8000, v1
2494; GFX11-SAFE-NEXT:    v_mul_f16_e64 v1, -v0, v3
2495; GFX11-SAFE-NEXT:    v_mov_b32_e32 v0, v2
2496; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2497;
2498; GFX11-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f16:
2499; GFX11-NSZ:       ; %bb.0:
2500; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2501; GFX11-NSZ-NEXT:    v_fma_f16 v2, v0, v1, -v2
2502; GFX11-NSZ-NEXT:    v_mul_f16_e64 v1, -v0, v3
2503; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2504; GFX11-NSZ-NEXT:    v_mov_b32_e32 v0, v2
2505; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2506  %fneg.a = fneg half %a
2507  %fma = call half @llvm.fma.f16(half %fneg.a, half %b, half %c)
2508  %fneg = fneg half %fma
2509  %use1 = fmul half %fneg.a, %d
2510  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
2511  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
2512  ret { half, half } %insert.1
2513}
2514
2515; --------------------------------------------------------------------------------
2516; fmad tests
2517; --------------------------------------------------------------------------------
2518
2519define half @v_fneg_fmad_f16(half %a, half %b, half %c) #0 {
2520; SI-SAFE-LABEL: v_fneg_fmad_f16:
2521; SI-SAFE:       ; %bb.0:
2522; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2523; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2524; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2525; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2526; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2527; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2528; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2529; SI-SAFE-NEXT:    v_mac_f32_e32 v2, v0, v1
2530; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v2
2531; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2532;
2533; SI-NSZ-LABEL: v_fneg_fmad_f16:
2534; SI-NSZ:       ; %bb.0:
2535; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2536; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2537; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2538; SI-NSZ-NEXT:    v_cvt_f16_f32_e64 v1, -v1
2539; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2540; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2541; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2542; SI-NSZ-NEXT:    v_mad_f32 v0, v0, v1, -v2
2543; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2544;
2545; VI-SAFE-LABEL: v_fneg_fmad_f16:
2546; VI-SAFE:       ; %bb.0:
2547; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2548; VI-SAFE-NEXT:    v_fma_f16 v0, v0, v1, v2
2549; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2550; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2551;
2552; VI-NSZ-LABEL: v_fneg_fmad_f16:
2553; VI-NSZ:       ; %bb.0:
2554; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2555; VI-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
2556; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2557;
2558; GFX11-SAFE-LABEL: v_fneg_fmad_f16:
2559; GFX11-SAFE:       ; %bb.0:
2560; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2561; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, v0, v1
2562; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2563; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v2
2564; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2565;
2566; GFX11-NSZ-LABEL: v_fneg_fmad_f16:
2567; GFX11-NSZ:       ; %bb.0:
2568; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2569; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
2570; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2571  %fma = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
2572  %fneg = fneg half %fma
2573  ret half %fneg
2574}
2575
2576define <4 x half> @v_fneg_fmad_v4f32(<4 x half> %a, <4 x half> %b, <4 x half> %c) #0 {
2577; SI-SAFE-LABEL: v_fneg_fmad_v4f32:
2578; SI-SAFE:       ; %bb.0:
2579; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2580; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v8, v8
2581; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v4, v4
2582; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v9, v9
2583; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2584; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v5, v5
2585; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v10, v10
2586; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v6, v6
2587; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2588; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v11, v11
2589; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v7, v7
2590; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v3
2591; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2592; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v8, v8
2593; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v4, v4
2594; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v9, v9
2595; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v5, v5
2596; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v10, v10
2597; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v6, v6
2598; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v11, v11
2599; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v7, v7
2600; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
2601; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2602; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2603; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2604; SI-SAFE-NEXT:    v_mac_f32_e32 v11, v3, v7
2605; SI-SAFE-NEXT:    v_mac_f32_e32 v10, v2, v6
2606; SI-SAFE-NEXT:    v_mac_f32_e32 v9, v1, v5
2607; SI-SAFE-NEXT:    v_mac_f32_e32 v8, v0, v4
2608; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v8
2609; SI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80000000, v9
2610; SI-SAFE-NEXT:    v_xor_b32_e32 v2, 0x80000000, v10
2611; SI-SAFE-NEXT:    v_xor_b32_e32 v3, 0x80000000, v11
2612; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2613;
2614; SI-NSZ-LABEL: v_fneg_fmad_v4f32:
2615; SI-NSZ:       ; %bb.0:
2616; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2617; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v11, v11
2618; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v3, v3
2619; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v10, v10
2620; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v7, v7
2621; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2622; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v9, v9
2623; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2624; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v6, v6
2625; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v8, v8
2626; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2627; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v4, v4
2628; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v5, v5
2629; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v11, v11
2630; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v3, v3
2631; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v10, v10
2632; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2633; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v9, v9
2634; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
2635; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v8, v8
2636; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2637; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v4, -v4
2638; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v5, -v5
2639; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v6, -v6
2640; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v7, -v7
2641; SI-NSZ-NEXT:    v_mad_f32 v0, v0, v4, -v8
2642; SI-NSZ-NEXT:    v_mad_f32 v1, v1, v5, -v9
2643; SI-NSZ-NEXT:    v_mad_f32 v2, v2, v6, -v10
2644; SI-NSZ-NEXT:    v_mad_f32 v3, v3, v7, -v11
2645; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2646;
2647; VI-SAFE-LABEL: v_fneg_fmad_v4f32:
2648; VI-SAFE:       ; %bb.0:
2649; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2650; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
2651; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
2652; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
2653; VI-SAFE-NEXT:    v_fma_f16 v6, v8, v7, v6
2654; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
2655; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
2656; VI-SAFE-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
2657; VI-SAFE-NEXT:    v_fma_f16 v7, v9, v8, v7
2658; VI-SAFE-NEXT:    v_fma_f16 v0, v0, v2, v4
2659; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
2660; VI-SAFE-NEXT:    v_fma_f16 v1, v1, v3, v5
2661; VI-SAFE-NEXT:    v_or_b32_e32 v0, v0, v2
2662; VI-SAFE-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
2663; VI-SAFE-NEXT:    v_or_b32_e32 v1, v1, v2
2664; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
2665; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
2666; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2667;
2668; VI-NSZ-LABEL: v_fneg_fmad_v4f32:
2669; VI-NSZ:       ; %bb.0:
2670; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2671; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v6, 16, v5
2672; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
2673; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v8, 16, v1
2674; VI-NSZ-NEXT:    v_fma_f16 v6, v8, -v7, -v6
2675; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v7, 16, v4
2676; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
2677; VI-NSZ-NEXT:    v_lshrrev_b32_e32 v9, 16, v0
2678; VI-NSZ-NEXT:    v_fma_f16 v7, v9, -v8, -v7
2679; VI-NSZ-NEXT:    v_fma_f16 v0, v0, -v2, -v4
2680; VI-NSZ-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
2681; VI-NSZ-NEXT:    v_fma_f16 v1, v1, -v3, -v5
2682; VI-NSZ-NEXT:    v_or_b32_e32 v0, v0, v2
2683; VI-NSZ-NEXT:    v_lshlrev_b32_e32 v2, 16, v6
2684; VI-NSZ-NEXT:    v_or_b32_e32 v1, v1, v2
2685; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2686;
2687; GFX11-SAFE-LABEL: v_fneg_fmad_v4f32:
2688; GFX11-SAFE:       ; %bb.0:
2689; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2690; GFX11-SAFE-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
2691; GFX11-SAFE-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
2692; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
2693; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
2694; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
2695; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2696;
2697; GFX11-NSZ-LABEL: v_fneg_fmad_v4f32:
2698; GFX11-NSZ:       ; %bb.0:
2699; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2700; GFX11-NSZ-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
2701; GFX11-NSZ-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
2702; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2703  %fma = call <4 x half> @llvm.fmuladd.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
2704  %fneg = fneg <4 x half> %fma
2705  ret <4 x half> %fneg
2706}
2707
2708define { half, half } @v_fneg_fmad_multi_use_fmad_f16(half %a, half %b, half %c) #0 {
2709; SI-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f16:
2710; SI-SAFE:       ; %bb.0:
2711; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2712; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
2713; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
2714; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
2715; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
2716; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
2717; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
2718; SI-SAFE-NEXT:    v_mac_f32_e32 v2, v0, v1
2719; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v2
2720; SI-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v2
2721; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2722;
2723; SI-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f16:
2724; SI-NSZ:       ; %bb.0:
2725; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2726; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
2727; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
2728; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
2729; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
2730; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
2731; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v1, -v1
2732; SI-NSZ-NEXT:    v_mad_f32 v0, v0, v1, -v2
2733; SI-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v0
2734; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2735;
2736; VI-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f16:
2737; VI-SAFE:       ; %bb.0:
2738; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2739; VI-SAFE-NEXT:    v_fma_f16 v1, v0, v1, v2
2740; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
2741; VI-SAFE-NEXT:    v_mul_f16_e32 v1, 4.0, v1
2742; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
2743;
2744; VI-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f16:
2745; VI-NSZ:       ; %bb.0:
2746; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2747; VI-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
2748; VI-NSZ-NEXT:    v_mul_f16_e32 v1, -4.0, v0
2749; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
2750;
2751; GFX11-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f16:
2752; GFX11-SAFE:       ; %bb.0:
2753; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2754; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, v0, v1
2755; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2756; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v2
2757; GFX11-SAFE-NEXT:    v_mul_f16_e32 v1, 4.0, v2
2758; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
2759;
2760; GFX11-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f16:
2761; GFX11-NSZ:       ; %bb.0:
2762; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2763; GFX11-NSZ-NEXT:    v_fma_f16 v0, v0, -v1, -v2
2764; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2765; GFX11-NSZ-NEXT:    v_mul_f16_e32 v1, -4.0, v0
2766; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
2767  %fma = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
2768  %fneg = fneg half %fma
2769  %use1 = fmul half %fma, 4.0
2770  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
2771  %insert.1 = insertvalue { half, half } %insert.0, half %use1, 1
2772  ret { half, half } %insert.1
2773}
2774
2775; --------------------------------------------------------------------------------
2776; fp_extend tests
2777; --------------------------------------------------------------------------------
2778
2779define double @v_fneg_fp_extend_f16_to_f64(half %a) #0 {
2780; SI-LABEL: v_fneg_fp_extend_f16_to_f64:
2781; SI:       ; %bb.0:
2782; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2783; SI-NEXT:    v_cvt_f64_f32_e64 v[0:1], -v0
2784; SI-NEXT:    s_setpc_b64 s[30:31]
2785;
2786; VI-LABEL: v_fneg_fp_extend_f16_to_f64:
2787; VI:       ; %bb.0:
2788; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2789; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2790; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2791; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2792; VI-NEXT:    s_setpc_b64 s[30:31]
2793;
2794; GFX11-LABEL: v_fneg_fp_extend_f16_to_f64:
2795; GFX11:       ; %bb.0:
2796; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2797; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2798; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2799; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
2800; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2801; GFX11-NEXT:    s_setpc_b64 s[30:31]
2802  %fpext = fpext half %a to double
2803  %fneg = fneg double %fpext
2804  ret double %fneg
2805}
2806
2807define double @v_fneg_fp_extend_fneg_f16_to_f64(half %a) #0 {
2808; SI-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
2809; SI:       ; %bb.0:
2810; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2811; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2812; SI-NEXT:    s_setpc_b64 s[30:31]
2813;
2814; VI-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
2815; VI:       ; %bb.0:
2816; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2817; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2818; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2819; VI-NEXT:    s_setpc_b64 s[30:31]
2820;
2821; GFX11-LABEL: v_fneg_fp_extend_fneg_f16_to_f64:
2822; GFX11:       ; %bb.0:
2823; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2824; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
2825; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2826; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2827; GFX11-NEXT:    s_setpc_b64 s[30:31]
2828  %fneg.a = fneg half %a
2829  %fpext = fpext half %fneg.a to double
2830  %fneg = fneg double %fpext
2831  ret double %fneg
2832}
2833
2834define { double, half } @v_fneg_fp_extend_store_use_fneg_f16_to_f64(half %a) #0 {
2835; SI-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
2836; SI:       ; %bb.0:
2837; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2838; SI-NEXT:    v_mov_b32_e32 v2, v0
2839; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v2
2840; SI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
2841; SI-NEXT:    s_setpc_b64 s[30:31]
2842;
2843; VI-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
2844; VI:       ; %bb.0:
2845; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2846; VI-NEXT:    v_mov_b32_e32 v2, v0
2847; VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2848; VI-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
2849; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2850; VI-NEXT:    s_setpc_b64 s[30:31]
2851;
2852; GFX11-LABEL: v_fneg_fp_extend_store_use_fneg_f16_to_f64:
2853; GFX11:       ; %bb.0:
2854; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2855; GFX11-NEXT:    v_mov_b32_e32 v2, v0
2856; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
2857; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v2
2858; GFX11-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
2859; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2860; GFX11-NEXT:    s_setpc_b64 s[30:31]
2861  %fneg.a = fneg half %a
2862  %fpext = fpext half %fneg.a to double
2863  %fneg = fneg double %fpext
2864  %insert.0 = insertvalue { double, half } poison, double %fneg, 0
2865  %insert.1 = insertvalue { double, half } %insert.0, half %fneg.a, 1
2866  ret { double, half } %insert.1
2867}
2868
2869define { double, double } @v_fneg_multi_use_fp_extend_fneg_f16_to_f64(half %a) #0 {
2870; SI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
2871; SI:       ; %bb.0:
2872; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2873; SI-NEXT:    v_cvt_f64_f32_e32 v[2:3], v0
2874; SI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
2875; SI-NEXT:    v_mov_b32_e32 v0, v2
2876; SI-NEXT:    s_setpc_b64 s[30:31]
2877;
2878; VI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
2879; VI:       ; %bb.0:
2880; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2881; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2882; VI-NEXT:    v_cvt_f64_f32_e32 v[2:3], v0
2883; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
2884; VI-NEXT:    v_mov_b32_e32 v0, v2
2885; VI-NEXT:    s_setpc_b64 s[30:31]
2886;
2887; GFX11-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f64:
2888; GFX11:       ; %bb.0:
2889; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2890; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
2891; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2892; GFX11-NEXT:    v_cvt_f64_f32_e32 v[2:3], v0
2893; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v3
2894; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
2895; GFX11-NEXT:    v_mov_b32_e32 v0, v2
2896; GFX11-NEXT:    s_setpc_b64 s[30:31]
2897  %fpext = fpext half %a to double
2898  %fneg = fneg double %fpext
2899  %insert.0 = insertvalue { double, double } poison, double %fneg, 0
2900  %insert.1 = insertvalue { double, double } %insert.0, double %fpext, 1
2901  ret { double, double } %insert.1
2902}
2903
2904define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64(half %a) #0 {
2905; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
2906; SI:       ; %bb.0:
2907; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2908; SI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2909; SI-NEXT:    v_xor_b32_e32 v4, 0x80000000, v1
2910; SI-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
2911; SI-NEXT:    v_mov_b32_e32 v1, v4
2912; SI-NEXT:    s_setpc_b64 s[30:31]
2913;
2914; VI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
2915; VI:       ; %bb.0:
2916; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2917; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2918; VI-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2919; VI-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
2920; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
2921; VI-NEXT:    s_setpc_b64 s[30:31]
2922;
2923; GFX11-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f64:
2924; GFX11:       ; %bb.0:
2925; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2926; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
2927; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2928; GFX11-NEXT:    v_cvt_f64_f32_e32 v[0:1], v0
2929; GFX11-NEXT:    v_mul_f64 v[2:3], v[0:1], 4.0
2930; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
2931; GFX11-NEXT:    s_setpc_b64 s[30:31]
2932  %fpext = fpext half %a to double
2933  %fneg = fneg double %fpext
2934  %mul = fmul double %fpext, 4.0
2935  %insert.0 = insertvalue { double, double } poison, double %fneg, 0
2936  %insert.1 = insertvalue { double, double } %insert.0, double %mul, 1
2937  ret { double, double } %insert.1
2938}
2939
2940define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0 {
2941; SI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
2942; SI:       ; %bb.0:
2943; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2944; SI-NEXT:    v_mov_b32_e32 v1, v0
2945; SI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
2946; SI-NEXT:    s_setpc_b64 s[30:31]
2947;
2948; VI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
2949; VI:       ; %bb.0:
2950; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2951; VI-NEXT:    v_cvt_f32_f16_e32 v1, v0
2952; VI-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
2953; VI-NEXT:    s_setpc_b64 s[30:31]
2954;
2955; GFX11-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32:
2956; GFX11:       ; %bb.0:
2957; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2958; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v0
2959; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
2960; GFX11-NEXT:    v_xor_b32_e32 v0, 0x80000000, v1
2961; GFX11-NEXT:    s_setpc_b64 s[30:31]
2962  %fpext = fpext half %a to float
2963  %fneg = fneg float %fpext
2964  %insert.0 = insertvalue { float, float } poison, float %fneg, 0
2965  %insert.1 = insertvalue { float, float } %insert.0, float %fpext, 1
2966  ret { float, float } %insert.1
2967}
2968
2969; --------------------------------------------------------------------------------
2970; fp_round tests
2971; --------------------------------------------------------------------------------
2972
2973define half @v_fneg_fp_round_f64_to_f16(double %a) #0 {
2974; SI-LABEL: v_fneg_fp_round_f64_to_f16:
2975; SI:       ; %bb.0:
2976; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2977; SI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
2978; SI-NEXT:    v_and_b32_e32 v2, 0x1ff, v1
2979; SI-NEXT:    v_or_b32_e32 v0, v2, v0
2980; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
2981; SI-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
2982; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
2983; SI-NEXT:    v_and_b32_e32 v2, 0xffe, v2
2984; SI-NEXT:    v_bfe_u32 v3, v1, 20, 11
2985; SI-NEXT:    s_movk_i32 s4, 0x3f1
2986; SI-NEXT:    v_or_b32_e32 v0, v2, v0
2987; SI-NEXT:    v_sub_i32_e32 v4, vcc, s4, v3
2988; SI-NEXT:    v_or_b32_e32 v2, 0x1000, v0
2989; SI-NEXT:    v_med3_i32 v4, v4, 0, 13
2990; SI-NEXT:    v_lshrrev_b32_e32 v5, v4, v2
2991; SI-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
2992; SI-NEXT:    v_cmp_ne_u32_e32 vcc, v4, v2
2993; SI-NEXT:    s_movk_i32 s4, 0xfc10
2994; SI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
2995; SI-NEXT:    v_add_i32_e32 v3, vcc, s4, v3
2996; SI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
2997; SI-NEXT:    v_or_b32_e32 v2, v5, v2
2998; SI-NEXT:    v_or_b32_e32 v4, v0, v4
2999; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
3000; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3001; SI-NEXT:    v_and_b32_e32 v4, 7, v2
3002; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
3003; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
3004; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3005; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3006; SI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
3007; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
3008; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
3009; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3010; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3011; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3012; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
3013; SI-NEXT:    s_movk_i32 s4, 0x40f
3014; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
3015; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
3016; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3017; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
3018; SI-NEXT:    v_and_b32_e32 v1, 0x8000, v1
3019; SI-NEXT:    v_or_b32_e32 v0, v1, v0
3020; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3021; SI-NEXT:    s_setpc_b64 s[30:31]
3022;
3023; VI-LABEL: v_fneg_fp_round_f64_to_f16:
3024; VI:       ; %bb.0:
3025; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3026; VI-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
3027; VI-NEXT:    v_and_b32_e32 v2, 0x1ff, v1
3028; VI-NEXT:    v_or_b32_e32 v0, v2, v0
3029; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3030; VI-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3031; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3032; VI-NEXT:    v_and_b32_e32 v2, 0xffe, v2
3033; VI-NEXT:    v_bfe_u32 v3, v1, 20, 11
3034; VI-NEXT:    s_movk_i32 s4, 0x3f1
3035; VI-NEXT:    v_or_b32_e32 v0, v2, v0
3036; VI-NEXT:    v_sub_u32_e32 v4, vcc, s4, v3
3037; VI-NEXT:    v_or_b32_e32 v2, 0x1000, v0
3038; VI-NEXT:    v_med3_i32 v4, v4, 0, 13
3039; VI-NEXT:    v_lshrrev_b32_e32 v5, v4, v2
3040; VI-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
3041; VI-NEXT:    v_cmp_ne_u32_e32 vcc, v4, v2
3042; VI-NEXT:    s_movk_i32 s4, 0xfc10
3043; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
3044; VI-NEXT:    v_add_u32_e32 v3, vcc, s4, v3
3045; VI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
3046; VI-NEXT:    v_or_b32_e32 v2, v5, v2
3047; VI-NEXT:    v_or_b32_e32 v4, v0, v4
3048; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
3049; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3050; VI-NEXT:    v_and_b32_e32 v4, 7, v2
3051; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
3052; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
3053; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3054; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3055; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
3056; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
3057; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
3058; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3059; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3060; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3061; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
3062; VI-NEXT:    s_movk_i32 s4, 0x40f
3063; VI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
3064; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
3065; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
3066; VI-NEXT:    v_mov_b32_e32 v2, 0x8000
3067; VI-NEXT:    v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3068; VI-NEXT:    v_or_b32_e32 v0, v1, v0
3069; VI-NEXT:    s_setpc_b64 s[30:31]
3070;
3071; GFX11-LABEL: v_fneg_fp_round_f64_to_f16:
3072; GFX11:       ; %bb.0:
3073; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3074; GFX11-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
3075; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_4)
3076; GFX11-NEXT:    v_and_or_b32 v0, 0x1ff, v1, v0
3077; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3078; GFX11-NEXT:    v_bfe_u32 v3, v1, 20, 11
3079; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3080; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
3081; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
3082; GFX11-NEXT:    v_sub_nc_u32_e32 v4, 0x3f1, v3
3083; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
3084; GFX11-NEXT:    v_and_or_b32 v0, 0xffe, v2, v0
3085; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3086; GFX11-NEXT:    v_med3_i32 v2, v4, 0, 13
3087; GFX11-NEXT:    v_or_b32_e32 v4, 0x1000, v0
3088; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v0
3089; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3090; GFX11-NEXT:    v_lshrrev_b32_e32 v5, v2, v4
3091; GFX11-NEXT:    v_lshlrev_b32_e32 v2, v2, v5
3092; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
3093; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v4
3094; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
3095; GFX11-NEXT:    v_or_b32_e32 v2, v5, v2
3096; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
3097; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
3098; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
3099; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
3100; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
3101; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
3102; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
3103; GFX11-NEXT:    v_lshl_or_b32 v0, v0, 9, 0x7c00
3104; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
3105; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3106; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
3107; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
3108; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v4
3109; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
3110; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3111; GFX11-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
3112; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
3113; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
3114; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
3115; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3116; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
3117; GFX11-NEXT:    v_and_or_b32 v0, 0x8000, v1, v0
3118; GFX11-NEXT:    s_setpc_b64 s[30:31]
3119  %fpround = fptrunc double %a to half
3120  %fneg = fneg half %fpround
3121  ret half %fneg
3122}
3123
3124define half @v_fneg_fp_round_fneg_f64_to_f16(double %a) #0 {
3125; SI-LABEL: v_fneg_fp_round_fneg_f64_to_f16:
3126; SI:       ; %bb.0:
3127; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3128; SI-NEXT:    v_and_b32_e32 v2, 0x1ff, v1
3129; SI-NEXT:    v_or_b32_e32 v0, v2, v0
3130; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3131; SI-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3132; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3133; SI-NEXT:    v_and_b32_e32 v2, 0xffe, v2
3134; SI-NEXT:    v_bfe_u32 v3, v1, 20, 11
3135; SI-NEXT:    v_or_b32_e32 v0, v2, v0
3136; SI-NEXT:    v_sub_i32_e32 v4, vcc, 0x3f1, v3
3137; SI-NEXT:    v_or_b32_e32 v2, 0x1000, v0
3138; SI-NEXT:    v_med3_i32 v4, v4, 0, 13
3139; SI-NEXT:    v_lshrrev_b32_e32 v5, v4, v2
3140; SI-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
3141; SI-NEXT:    v_cmp_ne_u32_e32 vcc, v4, v2
3142; SI-NEXT:    s_movk_i32 s4, 0xfc10
3143; SI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
3144; SI-NEXT:    v_add_i32_e32 v3, vcc, s4, v3
3145; SI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
3146; SI-NEXT:    v_or_b32_e32 v2, v5, v2
3147; SI-NEXT:    v_or_b32_e32 v4, v0, v4
3148; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
3149; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3150; SI-NEXT:    v_and_b32_e32 v4, 7, v2
3151; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
3152; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
3153; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3154; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3155; SI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
3156; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
3157; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
3158; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3159; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3160; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3161; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
3162; SI-NEXT:    s_movk_i32 s4, 0x40f
3163; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
3164; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
3165; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3166; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
3167; SI-NEXT:    v_and_b32_e32 v1, 0x8000, v1
3168; SI-NEXT:    v_or_b32_e32 v0, v1, v0
3169; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3170; SI-NEXT:    s_setpc_b64 s[30:31]
3171;
3172; VI-LABEL: v_fneg_fp_round_fneg_f64_to_f16:
3173; VI:       ; %bb.0:
3174; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3175; VI-NEXT:    v_and_b32_e32 v2, 0x1ff, v1
3176; VI-NEXT:    v_or_b32_e32 v0, v2, v0
3177; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3178; VI-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3179; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3180; VI-NEXT:    v_and_b32_e32 v2, 0xffe, v2
3181; VI-NEXT:    v_bfe_u32 v3, v1, 20, 11
3182; VI-NEXT:    v_or_b32_e32 v0, v2, v0
3183; VI-NEXT:    v_sub_u32_e32 v4, vcc, 0x3f1, v3
3184; VI-NEXT:    v_or_b32_e32 v2, 0x1000, v0
3185; VI-NEXT:    v_med3_i32 v4, v4, 0, 13
3186; VI-NEXT:    v_lshrrev_b32_e32 v5, v4, v2
3187; VI-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
3188; VI-NEXT:    v_cmp_ne_u32_e32 vcc, v4, v2
3189; VI-NEXT:    s_movk_i32 s4, 0xfc10
3190; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
3191; VI-NEXT:    v_add_u32_e32 v3, vcc, s4, v3
3192; VI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
3193; VI-NEXT:    v_or_b32_e32 v2, v5, v2
3194; VI-NEXT:    v_or_b32_e32 v4, v0, v4
3195; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
3196; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3197; VI-NEXT:    v_and_b32_e32 v4, 7, v2
3198; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
3199; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
3200; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3201; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3202; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
3203; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
3204; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
3205; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3206; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3207; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3208; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
3209; VI-NEXT:    s_movk_i32 s4, 0x40f
3210; VI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
3211; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
3212; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
3213; VI-NEXT:    v_mov_b32_e32 v2, 0x8000
3214; VI-NEXT:    v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3215; VI-NEXT:    v_or_b32_e32 v0, v1, v0
3216; VI-NEXT:    s_setpc_b64 s[30:31]
3217;
3218; GFX11-LABEL: v_fneg_fp_round_fneg_f64_to_f16:
3219; GFX11:       ; %bb.0:
3220; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3221; GFX11-NEXT:    v_and_or_b32 v0, 0x1ff, v1, v0
3222; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3223; GFX11-NEXT:    v_bfe_u32 v3, v1, 20, 11
3224; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3225; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
3226; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
3227; GFX11-NEXT:    v_sub_nc_u32_e32 v4, 0x3f1, v3
3228; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
3229; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
3230; GFX11-NEXT:    v_and_or_b32 v0, 0xffe, v2, v0
3231; GFX11-NEXT:    v_med3_i32 v2, v4, 0, 13
3232; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3233; GFX11-NEXT:    v_or_b32_e32 v4, 0x1000, v0
3234; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v0
3235; GFX11-NEXT:    v_lshrrev_b32_e32 v5, v2, v4
3236; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3237; GFX11-NEXT:    v_lshlrev_b32_e32 v2, v2, v5
3238; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v4
3239; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
3240; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
3241; GFX11-NEXT:    v_or_b32_e32 v2, v5, v2
3242; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
3243; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
3244; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
3245; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
3246; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3247; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
3248; GFX11-NEXT:    v_lshl_or_b32 v0, v0, 9, 0x7c00
3249; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3250; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
3251; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3252; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
3253; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v4
3254; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
3255; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
3256; GFX11-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
3257; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
3258; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3259; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
3260; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
3261; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
3262; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3263; GFX11-NEXT:    v_and_or_b32 v0, 0x8000, v1, v0
3264; GFX11-NEXT:    s_setpc_b64 s[30:31]
3265  %fneg.a = fneg double %a
3266  %fpround = fptrunc double %fneg.a to half
3267  %fneg = fneg half %fpround
3268  ret half %fneg
3269}
3270
3271define { half, double } @v_fneg_fp_round_store_use_fneg_f64_to_f16(double %a) #0 {
3272; SI-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
3273; SI:       ; %bb.0:
3274; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3275; SI-NEXT:    v_mov_b32_e32 v3, v0
3276; SI-NEXT:    v_and_b32_e32 v0, 0x1ff, v1
3277; SI-NEXT:    v_or_b32_e32 v0, v0, v3
3278; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3279; SI-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3280; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3281; SI-NEXT:    v_and_b32_e32 v2, 0xffe, v2
3282; SI-NEXT:    v_bfe_u32 v4, v1, 20, 11
3283; SI-NEXT:    s_movk_i32 s4, 0x3f1
3284; SI-NEXT:    v_or_b32_e32 v0, v2, v0
3285; SI-NEXT:    v_sub_i32_e32 v5, vcc, s4, v4
3286; SI-NEXT:    v_or_b32_e32 v2, 0x1000, v0
3287; SI-NEXT:    v_med3_i32 v5, v5, 0, 13
3288; SI-NEXT:    v_lshrrev_b32_e32 v6, v5, v2
3289; SI-NEXT:    v_lshlrev_b32_e32 v5, v5, v6
3290; SI-NEXT:    v_cmp_ne_u32_e32 vcc, v5, v2
3291; SI-NEXT:    s_movk_i32 s4, 0xfc10
3292; SI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
3293; SI-NEXT:    v_add_i32_e32 v4, vcc, s4, v4
3294; SI-NEXT:    v_lshlrev_b32_e32 v5, 12, v4
3295; SI-NEXT:    v_or_b32_e32 v2, v6, v2
3296; SI-NEXT:    v_or_b32_e32 v5, v0, v5
3297; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v4
3298; SI-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
3299; SI-NEXT:    v_and_b32_e32 v5, 7, v2
3300; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v5
3301; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v5
3302; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3303; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3304; SI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
3305; SI-NEXT:    v_mov_b32_e32 v5, 0x7c00
3306; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v4
3307; SI-NEXT:    v_cndmask_b32_e32 v2, v5, v2, vcc
3308; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3309; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3310; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
3311; SI-NEXT:    s_movk_i32 s4, 0x40f
3312; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
3313; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v4
3314; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
3315; SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
3316; SI-NEXT:    v_and_b32_e32 v2, 0x8000, v2
3317; SI-NEXT:    v_or_b32_e32 v0, v2, v0
3318; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3319; SI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
3320; SI-NEXT:    v_mov_b32_e32 v1, v3
3321; SI-NEXT:    s_setpc_b64 s[30:31]
3322;
3323; VI-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
3324; VI:       ; %bb.0:
3325; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3326; VI-NEXT:    v_mov_b32_e32 v3, v0
3327; VI-NEXT:    v_and_b32_e32 v0, 0x1ff, v1
3328; VI-NEXT:    v_or_b32_e32 v0, v0, v3
3329; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3330; VI-NEXT:    v_lshrrev_b32_e32 v4, 8, v1
3331; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3332; VI-NEXT:    v_and_b32_e32 v4, 0xffe, v4
3333; VI-NEXT:    v_bfe_u32 v5, v1, 20, 11
3334; VI-NEXT:    s_movk_i32 s4, 0x3f1
3335; VI-NEXT:    v_or_b32_e32 v0, v4, v0
3336; VI-NEXT:    v_sub_u32_e32 v6, vcc, s4, v5
3337; VI-NEXT:    v_or_b32_e32 v4, 0x1000, v0
3338; VI-NEXT:    v_med3_i32 v6, v6, 0, 13
3339; VI-NEXT:    v_lshrrev_b32_e32 v7, v6, v4
3340; VI-NEXT:    v_lshlrev_b32_e32 v6, v6, v7
3341; VI-NEXT:    v_cmp_ne_u32_e32 vcc, v6, v4
3342; VI-NEXT:    s_movk_i32 s4, 0xfc10
3343; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
3344; VI-NEXT:    v_add_u32_e32 v5, vcc, s4, v5
3345; VI-NEXT:    v_lshlrev_b32_e32 v6, 12, v5
3346; VI-NEXT:    v_or_b32_e32 v4, v7, v4
3347; VI-NEXT:    v_or_b32_e32 v6, v0, v6
3348; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v5
3349; VI-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
3350; VI-NEXT:    v_and_b32_e32 v6, 7, v4
3351; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v6
3352; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v6
3353; VI-NEXT:    v_lshrrev_b32_e32 v4, 2, v4
3354; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3355; VI-NEXT:    v_addc_u32_e32 v4, vcc, 0, v4, vcc
3356; VI-NEXT:    v_mov_b32_e32 v6, 0x7c00
3357; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v5
3358; VI-NEXT:    v_cndmask_b32_e32 v4, v6, v4, vcc
3359; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3360; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3361; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
3362; VI-NEXT:    s_movk_i32 s4, 0x40f
3363; VI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
3364; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v5
3365; VI-NEXT:    v_cndmask_b32_e32 v0, v4, v0, vcc
3366; VI-NEXT:    v_mov_b32_e32 v4, 0x8000
3367; VI-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
3368; VI-NEXT:    v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3369; VI-NEXT:    v_or_b32_e32 v0, v1, v0
3370; VI-NEXT:    v_mov_b32_e32 v1, v3
3371; VI-NEXT:    s_setpc_b64 s[30:31]
3372;
3373; GFX11-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f16:
3374; GFX11:       ; %bb.0:
3375; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3376; GFX11-NEXT:    v_and_or_b32 v2, 0x1ff, v1, v0
3377; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 8, v1
3378; GFX11-NEXT:    v_bfe_u32 v4, v1, 20, 11
3379; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3380; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v2
3381; GFX11-NEXT:    v_sub_nc_u32_e32 v5, 0x3f1, v4
3382; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
3383; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
3384; GFX11-NEXT:    v_and_or_b32 v2, 0xffe, v3, v2
3385; GFX11-NEXT:    v_med3_i32 v3, v5, 0, 13
3386; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3387; GFX11-NEXT:    v_or_b32_e32 v5, 0x1000, v2
3388; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v2
3389; GFX11-NEXT:    v_lshrrev_b32_e32 v6, v3, v5
3390; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3391; GFX11-NEXT:    v_lshlrev_b32_e32 v3, v3, v6
3392; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v3, v5
3393; GFX11-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc_lo
3394; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
3395; GFX11-NEXT:    v_or_b32_e32 v3, v6, v3
3396; GFX11-NEXT:    v_add_nc_u32_e32 v4, 0xfffffc10, v4
3397; GFX11-NEXT:    v_lshl_or_b32 v5, v4, 12, v2
3398; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v4
3399; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, s1
3400; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3401; GFX11-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc_lo
3402; GFX11-NEXT:    v_lshl_or_b32 v2, v2, 9, 0x7c00
3403; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3404; GFX11-NEXT:    v_and_b32_e32 v5, 7, v3
3405; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 2, v3
3406; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v5
3407; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v5
3408; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
3409; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_2)
3410; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
3411; GFX11-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
3412; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v4
3413; GFX11-NEXT:    v_cndmask_b32_e32 v3, 0x7c00, v3, vcc_lo
3414; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v4
3415; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3416; GFX11-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc_lo
3417; GFX11-NEXT:    v_and_or_b32 v3, 0x8000, v5, v2
3418; GFX11-NEXT:    v_xor_b32_e32 v2, 0x80000000, v1
3419; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
3420; GFX11-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3
3421; GFX11-NEXT:    s_setpc_b64 s[30:31]
3422  %fneg.a = fneg double %a
3423  %fpround = fptrunc double %fneg.a to half
3424  %fneg = fneg half %fpround
3425  %insert.0 = insertvalue { half, double } poison, half %fneg, 0
3426  %insert.1 = insertvalue { half, double } %insert.0, double %fneg.a, 1
3427  ret { half, double } %insert.1
3428}
3429
3430define { half, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f16(double %a, double %c) #0 {
3431; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f16:
3432; SI:       ; %bb.0:
3433; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3434; SI-NEXT:    v_and_b32_e32 v4, 0x1ff, v1
3435; SI-NEXT:    v_or_b32_e32 v4, v4, v0
3436; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
3437; SI-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
3438; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
3439; SI-NEXT:    v_and_b32_e32 v5, 0xffe, v5
3440; SI-NEXT:    v_bfe_u32 v6, v1, 20, 11
3441; SI-NEXT:    s_movk_i32 s4, 0x3f1
3442; SI-NEXT:    v_or_b32_e32 v4, v5, v4
3443; SI-NEXT:    v_sub_i32_e32 v7, vcc, s4, v6
3444; SI-NEXT:    v_or_b32_e32 v5, 0x1000, v4
3445; SI-NEXT:    v_med3_i32 v7, v7, 0, 13
3446; SI-NEXT:    v_lshrrev_b32_e32 v8, v7, v5
3447; SI-NEXT:    v_lshlrev_b32_e32 v7, v7, v8
3448; SI-NEXT:    v_cmp_ne_u32_e32 vcc, v7, v5
3449; SI-NEXT:    s_movk_i32 s4, 0xfc10
3450; SI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
3451; SI-NEXT:    v_add_i32_e32 v6, vcc, s4, v6
3452; SI-NEXT:    v_lshlrev_b32_e32 v7, 12, v6
3453; SI-NEXT:    v_or_b32_e32 v5, v8, v5
3454; SI-NEXT:    v_or_b32_e32 v7, v4, v7
3455; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v6
3456; SI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
3457; SI-NEXT:    v_and_b32_e32 v7, 7, v5
3458; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
3459; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v7
3460; SI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
3461; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3462; SI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3463; SI-NEXT:    v_mov_b32_e32 v7, 0x7c00
3464; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
3465; SI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
3466; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
3467; SI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
3468; SI-NEXT:    v_lshlrev_b32_e32 v4, 9, v4
3469; SI-NEXT:    s_movk_i32 s4, 0x40f
3470; SI-NEXT:    v_or_b32_e32 v4, 0x7c00, v4
3471; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v6
3472; SI-NEXT:    v_cndmask_b32_e32 v4, v5, v4, vcc
3473; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
3474; SI-NEXT:    v_and_b32_e32 v5, 0x8000, v5
3475; SI-NEXT:    v_or_b32_e32 v4, v5, v4
3476; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
3477; SI-NEXT:    v_mul_f64 v[1:2], -v[0:1], v[2:3]
3478; SI-NEXT:    v_mov_b32_e32 v0, v4
3479; SI-NEXT:    s_setpc_b64 s[30:31]
3480;
3481; VI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f16:
3482; VI:       ; %bb.0:
3483; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3484; VI-NEXT:    v_and_b32_e32 v4, 0x1ff, v1
3485; VI-NEXT:    v_or_b32_e32 v4, v4, v0
3486; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
3487; VI-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
3488; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
3489; VI-NEXT:    v_and_b32_e32 v5, 0xffe, v5
3490; VI-NEXT:    v_bfe_u32 v6, v1, 20, 11
3491; VI-NEXT:    s_movk_i32 s4, 0x3f1
3492; VI-NEXT:    v_or_b32_e32 v4, v5, v4
3493; VI-NEXT:    v_sub_u32_e32 v7, vcc, s4, v6
3494; VI-NEXT:    v_or_b32_e32 v5, 0x1000, v4
3495; VI-NEXT:    v_med3_i32 v7, v7, 0, 13
3496; VI-NEXT:    v_lshrrev_b32_e32 v8, v7, v5
3497; VI-NEXT:    v_lshlrev_b32_e32 v7, v7, v8
3498; VI-NEXT:    v_cmp_ne_u32_e32 vcc, v7, v5
3499; VI-NEXT:    s_movk_i32 s4, 0xfc10
3500; VI-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
3501; VI-NEXT:    v_add_u32_e32 v6, vcc, s4, v6
3502; VI-NEXT:    v_lshlrev_b32_e32 v7, 12, v6
3503; VI-NEXT:    v_or_b32_e32 v5, v8, v5
3504; VI-NEXT:    v_or_b32_e32 v7, v4, v7
3505; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v6
3506; VI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
3507; VI-NEXT:    v_and_b32_e32 v7, 7, v5
3508; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v7
3509; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v7
3510; VI-NEXT:    v_lshrrev_b32_e32 v5, 2, v5
3511; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3512; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
3513; VI-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
3514; VI-NEXT:    v_mov_b32_e32 v7, 0x7c00
3515; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v6
3516; VI-NEXT:    v_cndmask_b32_e32 v5, v7, v5, vcc
3517; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
3518; VI-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
3519; VI-NEXT:    v_lshlrev_b32_e32 v4, 9, v4
3520; VI-NEXT:    s_movk_i32 s4, 0x40f
3521; VI-NEXT:    v_or_b32_e32 v4, 0x7c00, v4
3522; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v6
3523; VI-NEXT:    v_cndmask_b32_e32 v0, v5, v4, vcc
3524; VI-NEXT:    v_mov_b32_e32 v4, 0x8000
3525; VI-NEXT:    v_and_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3526; VI-NEXT:    v_or_b32_e32 v0, v1, v0
3527; VI-NEXT:    v_mov_b32_e32 v1, v2
3528; VI-NEXT:    v_mov_b32_e32 v2, v3
3529; VI-NEXT:    s_setpc_b64 s[30:31]
3530;
3531; GFX11-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f16:
3532; GFX11:       ; %bb.0:
3533; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3534; GFX11-NEXT:    v_and_or_b32 v4, 0x1ff, v1, v0
3535; GFX11-NEXT:    v_lshrrev_b32_e32 v5, 8, v1
3536; GFX11-NEXT:    v_bfe_u32 v6, v1, 20, 11
3537; GFX11-NEXT:    v_mul_f64 v[2:3], -v[0:1], v[2:3]
3538; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3539; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v4
3540; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1)
3541; GFX11-NEXT:    v_sub_nc_u32_e32 v7, 0x3f1, v6
3542; GFX11-NEXT:    v_add_nc_u32_e32 v6, 0xfffffc10, v6
3543; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc_lo
3544; GFX11-NEXT:    v_and_or_b32 v4, 0xffe, v5, v4
3545; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
3546; GFX11-NEXT:    v_med3_i32 v5, v7, 0, 13
3547; GFX11-NEXT:    v_or_b32_e32 v7, 0x1000, v4
3548; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v4
3549; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
3550; GFX11-NEXT:    v_lshrrev_b32_e32 v8, v5, v7
3551; GFX11-NEXT:    v_lshlrev_b32_e32 v5, v5, v8
3552; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3)
3553; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v5, v7
3554; GFX11-NEXT:    v_lshl_or_b32 v7, v6, 12, v4
3555; GFX11-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s1
3556; GFX11-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc_lo
3557; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v6
3558; GFX11-NEXT:    v_lshl_or_b32 v4, v4, 9, 0x7c00
3559; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
3560; GFX11-NEXT:    v_or_b32_e32 v5, v8, v5
3561; GFX11-NEXT:    v_cndmask_b32_e32 v0, v7, v5, vcc_lo
3562; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3563; GFX11-NEXT:    v_and_b32_e32 v5, 7, v0
3564; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 2, v0
3565; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v5
3566; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v5
3567; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
3568; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
3569; GFX11-NEXT:    v_add_co_ci_u32_e32 v0, vcc_lo, 0, v0, vcc_lo
3570; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v6
3571; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3572; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x7c00, v0, vcc_lo
3573; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v6
3574; GFX11-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
3575; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3576; GFX11-NEXT:    v_and_or_b32 v0, 0x8000, v1, v0
3577; GFX11-NEXT:    v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
3578; GFX11-NEXT:    s_setpc_b64 s[30:31]
3579  %fneg.a = fneg double %a
3580  %fpround = fptrunc double %fneg.a to half
3581  %fneg = fneg half %fpround
3582  %use1 = fmul double %fneg.a, %c
3583  %insert.0 = insertvalue { half, double } poison, half %fneg, 0
3584  %insert.1 = insertvalue { half, double } %insert.0, double %use1, 1
3585  ret { half, double } %insert.1
3586}
3587
3588define { half, half } @v_fneg_multi_use_fp_round_fneg_f64_to_f16(double %a) #0 {
3589; SI-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
3590; SI:       ; %bb.0:
3591; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3592; SI-NEXT:    v_and_b32_e32 v2, 0x1ff, v1
3593; SI-NEXT:    v_or_b32_e32 v0, v2, v0
3594; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3595; SI-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3596; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3597; SI-NEXT:    v_and_b32_e32 v2, 0xffe, v2
3598; SI-NEXT:    v_bfe_u32 v3, v1, 20, 11
3599; SI-NEXT:    v_or_b32_e32 v0, v2, v0
3600; SI-NEXT:    v_sub_i32_e32 v4, vcc, 0x3f1, v3
3601; SI-NEXT:    v_or_b32_e32 v2, 0x1000, v0
3602; SI-NEXT:    v_med3_i32 v4, v4, 0, 13
3603; SI-NEXT:    v_lshrrev_b32_e32 v5, v4, v2
3604; SI-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
3605; SI-NEXT:    v_cmp_ne_u32_e32 vcc, v4, v2
3606; SI-NEXT:    s_movk_i32 s4, 0xfc10
3607; SI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
3608; SI-NEXT:    v_add_i32_e32 v3, vcc, s4, v3
3609; SI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
3610; SI-NEXT:    v_or_b32_e32 v2, v5, v2
3611; SI-NEXT:    v_or_b32_e32 v4, v0, v4
3612; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
3613; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3614; SI-NEXT:    v_and_b32_e32 v4, 7, v2
3615; SI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
3616; SI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
3617; SI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3618; SI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3619; SI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
3620; SI-NEXT:    v_mov_b32_e32 v4, 0x7c00
3621; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
3622; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3623; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3624; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3625; SI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
3626; SI-NEXT:    s_movk_i32 s4, 0x40f
3627; SI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
3628; SI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
3629; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3630; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
3631; SI-NEXT:    v_and_b32_e32 v1, 0x8000, v1
3632; SI-NEXT:    v_or_b32_e32 v1, v1, v0
3633; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v1
3634; SI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
3635; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
3636; SI-NEXT:    s_setpc_b64 s[30:31]
3637;
3638; VI-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
3639; VI:       ; %bb.0:
3640; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3641; VI-NEXT:    v_and_b32_e32 v2, 0x1ff, v1
3642; VI-NEXT:    v_or_b32_e32 v0, v2, v0
3643; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3644; VI-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3645; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3646; VI-NEXT:    v_and_b32_e32 v2, 0xffe, v2
3647; VI-NEXT:    v_bfe_u32 v3, v1, 20, 11
3648; VI-NEXT:    v_or_b32_e32 v0, v2, v0
3649; VI-NEXT:    v_sub_u32_e32 v4, vcc, 0x3f1, v3
3650; VI-NEXT:    v_or_b32_e32 v2, 0x1000, v0
3651; VI-NEXT:    v_med3_i32 v4, v4, 0, 13
3652; VI-NEXT:    v_lshrrev_b32_e32 v5, v4, v2
3653; VI-NEXT:    v_lshlrev_b32_e32 v4, v4, v5
3654; VI-NEXT:    v_cmp_ne_u32_e32 vcc, v4, v2
3655; VI-NEXT:    s_movk_i32 s4, 0xfc10
3656; VI-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
3657; VI-NEXT:    v_add_u32_e32 v3, vcc, s4, v3
3658; VI-NEXT:    v_lshlrev_b32_e32 v4, 12, v3
3659; VI-NEXT:    v_or_b32_e32 v2, v5, v2
3660; VI-NEXT:    v_or_b32_e32 v4, v0, v4
3661; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 1, v3
3662; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3663; VI-NEXT:    v_and_b32_e32 v4, 7, v2
3664; VI-NEXT:    v_cmp_lt_i32_e32 vcc, 5, v4
3665; VI-NEXT:    v_cmp_eq_u32_e64 s[4:5], 3, v4
3666; VI-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3667; VI-NEXT:    s_or_b64 vcc, s[4:5], vcc
3668; VI-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
3669; VI-NEXT:    v_mov_b32_e32 v4, 0x7c00
3670; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 31, v3
3671; VI-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
3672; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
3673; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
3674; VI-NEXT:    v_lshlrev_b32_e32 v0, 9, v0
3675; VI-NEXT:    s_movk_i32 s4, 0x40f
3676; VI-NEXT:    v_or_b32_e32 v0, 0x7c00, v0
3677; VI-NEXT:    v_cmp_eq_u32_e32 vcc, s4, v3
3678; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc
3679; VI-NEXT:    v_mov_b32_e32 v2, 0x8000
3680; VI-NEXT:    v_and_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
3681; VI-NEXT:    v_or_b32_e32 v1, v1, v0
3682; VI-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
3683; VI-NEXT:    s_setpc_b64 s[30:31]
3684;
3685; GFX11-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f16:
3686; GFX11:       ; %bb.0:
3687; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3688; GFX11-NEXT:    v_and_or_b32 v0, 0x1ff, v1, v0
3689; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 8, v1
3690; GFX11-NEXT:    v_bfe_u32 v3, v1, 20, 11
3691; GFX11-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
3692; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3)
3693; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
3694; GFX11-NEXT:    v_sub_nc_u32_e32 v4, 0x3f1, v3
3695; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc_lo
3696; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
3697; GFX11-NEXT:    v_and_or_b32 v0, 0xffe, v2, v0
3698; GFX11-NEXT:    v_med3_i32 v2, v4, 0, 13
3699; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3700; GFX11-NEXT:    v_or_b32_e32 v4, 0x1000, v0
3701; GFX11-NEXT:    v_cmp_ne_u32_e64 s1, 0, v0
3702; GFX11-NEXT:    v_lshrrev_b32_e32 v5, v2, v4
3703; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3704; GFX11-NEXT:    v_lshlrev_b32_e32 v2, v2, v5
3705; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, v2, v4
3706; GFX11-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc_lo
3707; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
3708; GFX11-NEXT:    v_or_b32_e32 v2, v5, v2
3709; GFX11-NEXT:    v_add_nc_u32_e32 v3, 0xfffffc10, v3
3710; GFX11-NEXT:    v_lshl_or_b32 v4, v3, 12, v0
3711; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 1, v3
3712; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
3713; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
3714; GFX11-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc_lo
3715; GFX11-NEXT:    v_lshl_or_b32 v0, v0, 9, 0x7c00
3716; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3717; GFX11-NEXT:    v_and_b32_e32 v4, 7, v2
3718; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 2, v2
3719; GFX11-NEXT:    v_cmp_lt_i32_e32 vcc_lo, 5, v4
3720; GFX11-NEXT:    v_cmp_eq_u32_e64 s0, 3, v4
3721; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
3722; GFX11-NEXT:    s_or_b32 vcc_lo, s0, vcc_lo
3723; GFX11-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, 0, v2, vcc_lo
3724; GFX11-NEXT:    v_cmp_gt_i32_e32 vcc_lo, 31, v3
3725; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
3726; GFX11-NEXT:    v_cndmask_b32_e32 v2, 0x7c00, v2, vcc_lo
3727; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0x40f, v3
3728; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v0, vcc_lo
3729; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3730; GFX11-NEXT:    v_and_or_b32 v1, 0x8000, v1, v0
3731; GFX11-NEXT:    v_xor_b32_e32 v0, 0x8000, v1
3732; GFX11-NEXT:    s_setpc_b64 s[30:31]
3733  %fpround = fptrunc double %a to half
3734  %fneg = fneg half %fpround
3735  %insert.0 = insertvalue { half, half } poison, half %fneg, 0
3736  %insert.1 = insertvalue { half, half } %insert.0, half %fpround, 1
3737  ret { half, half } %insert.1
3738}
3739
3740; --------------------------------------------------------------------------------
3741; ftrunc tests
3742; --------------------------------------------------------------------------------
3743
3744define half @v_fneg_trunc_f16(half %a) #0 {
3745; SI-LABEL: v_fneg_trunc_f16:
3746; SI:       ; %bb.0:
3747; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3748; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
3749; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3750; SI-NEXT:    v_trunc_f32_e32 v0, v0
3751; SI-NEXT:    s_setpc_b64 s[30:31]
3752;
3753; VI-LABEL: v_fneg_trunc_f16:
3754; VI:       ; %bb.0:
3755; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3756; VI-NEXT:    v_trunc_f16_e64 v0, -v0
3757; VI-NEXT:    s_setpc_b64 s[30:31]
3758;
3759; GFX11-LABEL: v_fneg_trunc_f16:
3760; GFX11:       ; %bb.0:
3761; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3762; GFX11-NEXT:    v_trunc_f16_e64 v0, -v0
3763; GFX11-NEXT:    s_setpc_b64 s[30:31]
3764  %trunc = call half @llvm.trunc.f16(half %a)
3765  %fneg = fneg half %trunc
3766  ret half %fneg
3767}
3768
3769; --------------------------------------------------------------------------------
3770; fround tests
3771; --------------------------------------------------------------------------------
3772
3773define half @v_fneg_round_f16(half %a) #0 {
3774; SI-SAFE-LABEL: v_fneg_round_f16:
3775; SI-SAFE:       ; %bb.0:
3776; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3777; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v0
3778; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
3779; SI-SAFE-NEXT:    v_trunc_f32_e32 v1, v0
3780; SI-SAFE-NEXT:    v_sub_f32_e32 v2, v0, v1
3781; SI-SAFE-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
3782; SI-SAFE-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
3783; SI-SAFE-NEXT:    s_brev_b32 s4, -2
3784; SI-SAFE-NEXT:    v_bfi_b32 v0, s4, v2, v0
3785; SI-SAFE-NEXT:    v_add_f32_e32 v0, v1, v0
3786; SI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
3787; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
3788;
3789; SI-NSZ-LABEL: v_fneg_round_f16:
3790; SI-NSZ:       ; %bb.0:
3791; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3792; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
3793; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
3794; SI-NSZ-NEXT:    v_trunc_f32_e32 v1, v0
3795; SI-NSZ-NEXT:    v_sub_f32_e32 v2, v0, v1
3796; SI-NSZ-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5
3797; SI-NSZ-NEXT:    v_cndmask_b32_e64 v2, 0, 1.0, s[4:5]
3798; SI-NSZ-NEXT:    s_brev_b32 s4, -2
3799; SI-NSZ-NEXT:    v_bfi_b32 v0, s4, v2, v0
3800; SI-NSZ-NEXT:    v_sub_f32_e64 v0, -v1, v0
3801; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
3802;
3803; VI-SAFE-LABEL: v_fneg_round_f16:
3804; VI-SAFE:       ; %bb.0:
3805; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3806; VI-SAFE-NEXT:    v_trunc_f16_e32 v1, v0
3807; VI-SAFE-NEXT:    v_sub_f16_e32 v2, v0, v1
3808; VI-SAFE-NEXT:    v_mov_b32_e32 v3, 0x3c00
3809; VI-SAFE-NEXT:    v_cmp_ge_f16_e64 vcc, |v2|, 0.5
3810; VI-SAFE-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc
3811; VI-SAFE-NEXT:    s_movk_i32 s4, 0x7fff
3812; VI-SAFE-NEXT:    v_bfi_b32 v0, s4, v2, v0
3813; VI-SAFE-NEXT:    v_add_f16_e32 v0, v1, v0
3814; VI-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
3815; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
3816;
3817; VI-NSZ-LABEL: v_fneg_round_f16:
3818; VI-NSZ:       ; %bb.0:
3819; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3820; VI-NSZ-NEXT:    v_trunc_f16_e32 v1, v0
3821; VI-NSZ-NEXT:    v_sub_f16_e32 v2, v0, v1
3822; VI-NSZ-NEXT:    v_mov_b32_e32 v3, 0x3c00
3823; VI-NSZ-NEXT:    v_cmp_ge_f16_e64 vcc, |v2|, 0.5
3824; VI-NSZ-NEXT:    v_cndmask_b32_e32 v2, 0, v3, vcc
3825; VI-NSZ-NEXT:    s_movk_i32 s4, 0x7fff
3826; VI-NSZ-NEXT:    v_bfi_b32 v0, s4, v2, v0
3827; VI-NSZ-NEXT:    v_sub_f16_e64 v0, -v1, v0
3828; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
3829;
3830; GFX11-SAFE-LABEL: v_fneg_round_f16:
3831; GFX11-SAFE:       ; %bb.0:
3832; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3833; GFX11-SAFE-NEXT:    v_trunc_f16_e32 v1, v0
3834; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3835; GFX11-SAFE-NEXT:    v_sub_f16_e32 v2, v0, v1
3836; GFX11-SAFE-NEXT:    v_cmp_ge_f16_e64 s0, |v2|, 0.5
3837; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3838; GFX11-SAFE-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s0
3839; GFX11-SAFE-NEXT:    v_bfi_b32 v0, 0x7fff, v2, v0
3840; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3841; GFX11-SAFE-NEXT:    v_add_f16_e32 v0, v1, v0
3842; GFX11-SAFE-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
3843; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
3844;
3845; GFX11-NSZ-LABEL: v_fneg_round_f16:
3846; GFX11-NSZ:       ; %bb.0:
3847; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3848; GFX11-NSZ-NEXT:    v_trunc_f16_e32 v1, v0
3849; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3850; GFX11-NSZ-NEXT:    v_sub_f16_e32 v2, v0, v1
3851; GFX11-NSZ-NEXT:    v_cmp_ge_f16_e64 s0, |v2|, 0.5
3852; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
3853; GFX11-NSZ-NEXT:    v_cndmask_b32_e64 v2, 0, 0x3c00, s0
3854; GFX11-NSZ-NEXT:    v_bfi_b32 v0, 0x7fff, v2, v0
3855; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3856; GFX11-NSZ-NEXT:    v_sub_f16_e64 v0, -v1, v0
3857; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
3858  %round = call half @llvm.round.f16(half %a)
3859  %fneg = fneg half %round
3860  ret half %fneg
3861}
3862
3863; --------------------------------------------------------------------------------
3864; rint tests
3865; --------------------------------------------------------------------------------
3866
3867define half @v_fneg_rint_f16(half %a) #0 {
3868; SI-LABEL: v_fneg_rint_f16:
3869; SI:       ; %bb.0:
3870; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3871; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
3872; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3873; SI-NEXT:    v_rndne_f32_e32 v0, v0
3874; SI-NEXT:    s_setpc_b64 s[30:31]
3875;
3876; VI-LABEL: v_fneg_rint_f16:
3877; VI:       ; %bb.0:
3878; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3879; VI-NEXT:    v_rndne_f16_e64 v0, -v0
3880; VI-NEXT:    s_setpc_b64 s[30:31]
3881;
3882; GFX11-LABEL: v_fneg_rint_f16:
3883; GFX11:       ; %bb.0:
3884; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3885; GFX11-NEXT:    v_rndne_f16_e64 v0, -v0
3886; GFX11-NEXT:    s_setpc_b64 s[30:31]
3887  %rint = call half @llvm.rint.f16(half %a)
3888  %fneg = fneg half %rint
3889  ret half %fneg
3890}
3891
3892; --------------------------------------------------------------------------------
3893; nearbyint tests
3894; --------------------------------------------------------------------------------
3895
3896define half @v_fneg_nearbyint_f16(half %a) #0 {
3897; SI-LABEL: v_fneg_nearbyint_f16:
3898; SI:       ; %bb.0:
3899; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3900; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
3901; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3902; SI-NEXT:    v_rndne_f32_e32 v0, v0
3903; SI-NEXT:    s_setpc_b64 s[30:31]
3904;
3905; VI-LABEL: v_fneg_nearbyint_f16:
3906; VI:       ; %bb.0:
3907; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3908; VI-NEXT:    v_rndne_f16_e64 v0, -v0
3909; VI-NEXT:    s_setpc_b64 s[30:31]
3910;
3911; GFX11-LABEL: v_fneg_nearbyint_f16:
3912; GFX11:       ; %bb.0:
3913; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3914; GFX11-NEXT:    v_rndne_f16_e64 v0, -v0
3915; GFX11-NEXT:    s_setpc_b64 s[30:31]
3916  %nearbyint = call half @llvm.nearbyint.f16(half %a)
3917  %fneg = fneg half %nearbyint
3918  ret half %fneg
3919}
3920
3921; --------------------------------------------------------------------------------
3922; sin tests
3923; --------------------------------------------------------------------------------
3924
3925define half @v_fneg_sin_f16(half %a) #0 {
3926; SI-LABEL: v_fneg_sin_f16:
3927; SI:       ; %bb.0:
3928; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3929; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
3930; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3931; SI-NEXT:    v_mul_f32_e32 v0, 0x3e22f983, v0
3932; SI-NEXT:    v_fract_f32_e32 v0, v0
3933; SI-NEXT:    v_sin_f32_e32 v0, v0
3934; SI-NEXT:    s_setpc_b64 s[30:31]
3935;
3936; VI-LABEL: v_fneg_sin_f16:
3937; VI:       ; %bb.0:
3938; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3939; VI-NEXT:    v_mul_f16_e32 v0, 0xb118, v0
3940; VI-NEXT:    v_fract_f16_e32 v0, v0
3941; VI-NEXT:    v_sin_f16_e32 v0, v0
3942; VI-NEXT:    s_setpc_b64 s[30:31]
3943;
3944; GFX11-LABEL: v_fneg_sin_f16:
3945; GFX11:       ; %bb.0:
3946; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3947; GFX11-NEXT:    v_mul_f16_e32 v0, 0xb118, v0
3948; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
3949; GFX11-NEXT:    v_sin_f16_e32 v0, v0
3950; GFX11-NEXT:    s_setpc_b64 s[30:31]
3951  %sin = call half @llvm.sin.f16(half %a)
3952  %fneg = fneg half %sin
3953  ret half %fneg
3954}
3955
3956; --------------------------------------------------------------------------------
3957; fcanonicalize tests
3958; --------------------------------------------------------------------------------
3959
3960define half @v_fneg_canonicalize_f16(half %a) #0 {
3961; SI-LABEL: v_fneg_canonicalize_f16:
3962; SI:       ; %bb.0:
3963; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3964; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
3965; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
3966; SI-NEXT:    s_setpc_b64 s[30:31]
3967;
3968; VI-LABEL: v_fneg_canonicalize_f16:
3969; VI:       ; %bb.0:
3970; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3971; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
3972; VI-NEXT:    s_setpc_b64 s[30:31]
3973;
3974; GFX11-LABEL: v_fneg_canonicalize_f16:
3975; GFX11:       ; %bb.0:
3976; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3977; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
3978; GFX11-NEXT:    s_setpc_b64 s[30:31]
3979  %trunc = call half @llvm.canonicalize.f16(half %a)
3980  %fneg = fneg half %trunc
3981  ret half %fneg
3982}
3983
3984; --------------------------------------------------------------------------------
3985; CopyToReg tests
3986; --------------------------------------------------------------------------------
3987
3988define void @v_fneg_copytoreg_f16(ptr addrspace(1) %out, half %a, half %b, half %c, i32 %d) #0 {
3989; SI-LABEL: v_fneg_copytoreg_f16:
3990; SI:       ; %bb.0:
3991; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3992; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
3993; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
3994; SI-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
3995; SI-NEXT:    v_lshlrev_b32_e32 v6, 1, v6
3996; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
3997; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
3998; SI-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
3999; SI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4000; SI-NEXT:    v_mul_f32_e32 v2, v2, v3
4001; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
4002; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
4003; SI-NEXT:    s_cbranch_execz .LBB81_2
4004; SI-NEXT:  ; %bb.1: ; %if
4005; SI-NEXT:    v_cvt_f16_f32_e64 v3, -v2
4006; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
4007; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
4008; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
4009; SI-NEXT:    v_mul_f32_e32 v3, v3, v4
4010; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
4011; SI-NEXT:    flat_store_short v[0:1], v3
4012; SI-NEXT:    s_waitcnt vmcnt(0)
4013; SI-NEXT:  .LBB81_2: ; %endif
4014; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
4015; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
4016; SI-NEXT:    flat_store_short v[0:1], v2
4017; SI-NEXT:    s_waitcnt vmcnt(0)
4018; SI-NEXT:    s_setpc_b64 s[30:31]
4019;
4020; VI-LABEL: v_fneg_copytoreg_f16:
4021; VI:       ; %bb.0:
4022; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4023; VI-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
4024; VI-NEXT:    v_lshlrev_b32_e32 v6, 1, v6
4025; VI-NEXT:    v_add_u32_e32 v0, vcc, v0, v6
4026; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
4027; VI-NEXT:    v_mul_f16_e32 v2, v2, v3
4028; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v5
4029; VI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
4030; VI-NEXT:    s_cbranch_execz .LBB81_2
4031; VI-NEXT:  ; %bb.1: ; %if
4032; VI-NEXT:    v_mul_f16_e64 v3, -v2, v4
4033; VI-NEXT:    flat_store_short v[0:1], v3
4034; VI-NEXT:    s_waitcnt vmcnt(0)
4035; VI-NEXT:  .LBB81_2: ; %endif
4036; VI-NEXT:    s_or_b64 exec, exec, s[4:5]
4037; VI-NEXT:    flat_store_short v[0:1], v2
4038; VI-NEXT:    s_waitcnt vmcnt(0)
4039; VI-NEXT:    s_setpc_b64 s[30:31]
4040;
4041; GFX11-LABEL: v_fneg_copytoreg_f16:
4042; GFX11:       ; %bb.0:
4043; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4044; GFX11-NEXT:    v_and_b32_e32 v6, 0x3ff, v31
4045; GFX11-NEXT:    v_mul_f16_e32 v2, v2, v3
4046; GFX11-NEXT:    s_mov_b32 s0, exec_lo
4047; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
4048; GFX11-NEXT:    v_lshlrev_b32_e32 v6, 1, v6
4049; GFX11-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v6
4050; GFX11-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
4051; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v5
4052; GFX11-NEXT:    s_cbranch_execz .LBB81_2
4053; GFX11-NEXT:  ; %bb.1: ; %if
4054; GFX11-NEXT:    v_mul_f16_e64 v3, -v2, v4
4055; GFX11-NEXT:    global_store_b16 v[0:1], v3, off dlc
4056; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4057; GFX11-NEXT:  .LBB81_2: ; %endif
4058; GFX11-NEXT:    s_or_b32 exec_lo, exec_lo, s0
4059; GFX11-NEXT:    global_store_b16 v[0:1], v2, off dlc
4060; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
4061; GFX11-NEXT:    s_setpc_b64 s[30:31]
4062  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4063  %tid.ext = sext i32 %tid to i64
4064  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
4065  %mul = fmul half %a, %b
4066  %fneg = fneg half %mul
4067  %cmp0 = icmp eq i32 %d, 0
4068  br i1 %cmp0, label %if, label %endif
4069
4070if:
4071  %mul1 = fmul half %fneg, %c
4072  store volatile half %mul1, ptr addrspace(1) %out.gep
4073  br label %endif
4074
4075endif:
4076  store volatile half %mul, ptr addrspace(1) %out.gep
4077  ret void
4078}
4079
4080; --------------------------------------------------------------------------------
4081; inlineasm tests
4082; --------------------------------------------------------------------------------
4083
4084; Can't fold into use, so should fold into source
4085define half @v_fneg_inlineasm_f16(half %a, half %b, half %c, i32 %d) #0 {
4086; SI-LABEL: v_fneg_inlineasm_f16:
4087; SI:       ; %bb.0:
4088; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4089; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
4090; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
4091; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4092; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4093; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
4094; SI-NEXT:    v_cvt_f16_f32_e32 v1, v0
4095; SI-NEXT:    ;;#ASMSTART
4096; SI-NEXT:    ; use v1
4097; SI-NEXT:    ;;#ASMEND
4098; SI-NEXT:    s_setpc_b64 s[30:31]
4099;
4100; VI-LABEL: v_fneg_inlineasm_f16:
4101; VI:       ; %bb.0:
4102; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4103; VI-NEXT:    v_mul_f16_e64 v0, v0, -v1
4104; VI-NEXT:    ;;#ASMSTART
4105; VI-NEXT:    ; use v0
4106; VI-NEXT:    ;;#ASMEND
4107; VI-NEXT:    s_setpc_b64 s[30:31]
4108;
4109; GFX11-LABEL: v_fneg_inlineasm_f16:
4110; GFX11:       ; %bb.0:
4111; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4112; GFX11-NEXT:    v_mul_f16_e64 v0, v0, -v1
4113; GFX11-NEXT:    ;;#ASMSTART
4114; GFX11-NEXT:    ; use v0
4115; GFX11-NEXT:    ;;#ASMEND
4116; GFX11-NEXT:    s_setpc_b64 s[30:31]
4117  %mul = fmul half %a, %b
4118  %fneg = fneg half %mul
4119  call void asm sideeffect "; use $0", "v"(half %fneg) #0
4120  ret half %fneg
4121}
4122
4123; --------------------------------------------------------------------------------
4124; inlineasm tests
4125; --------------------------------------------------------------------------------
4126
4127; Can't fold into use, so should fold into source
4128define half @v_fneg_inlineasm_multi_use_src_f16(ptr addrspace(1) %out, half %a, half %b, half %c, i32 %d) #0 {
4129; SI-LABEL: v_fneg_inlineasm_multi_use_src_f16:
4130; SI:       ; %bb.0:
4131; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4132; SI-NEXT:    v_cvt_f16_f32_e32 v0, v2
4133; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v3
4134; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4135; SI-NEXT:    v_cvt_f32_f16_e64 v1, -v1
4136; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
4137; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v0
4138; SI-NEXT:    ;;#ASMSTART
4139; SI-NEXT:    ; use v1
4140; SI-NEXT:    ;;#ASMEND
4141; SI-NEXT:    s_setpc_b64 s[30:31]
4142;
4143; VI-LABEL: v_fneg_inlineasm_multi_use_src_f16:
4144; VI:       ; %bb.0:
4145; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4146; VI-NEXT:    v_mul_f16_e32 v0, v2, v3
4147; VI-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
4148; VI-NEXT:    ;;#ASMSTART
4149; VI-NEXT:    ; use v1
4150; VI-NEXT:    ;;#ASMEND
4151; VI-NEXT:    s_setpc_b64 s[30:31]
4152;
4153; GFX11-LABEL: v_fneg_inlineasm_multi_use_src_f16:
4154; GFX11:       ; %bb.0:
4155; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4156; GFX11-NEXT:    v_mul_f16_e32 v0, v2, v3
4157; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4158; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v0
4159; GFX11-NEXT:    ;;#ASMSTART
4160; GFX11-NEXT:    ; use v1
4161; GFX11-NEXT:    ;;#ASMEND
4162; GFX11-NEXT:    s_setpc_b64 s[30:31]
4163  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4164  %tid.ext = sext i32 %tid to i64
4165  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
4166  %mul = fmul half %a, %b
4167  %fneg = fneg half %mul
4168  call void asm sideeffect "; use $0", "v"(half %fneg) #0
4169  ret half %mul
4170}
4171
4172; --------------------------------------------------------------------------------
4173; code size regression tests
4174; --------------------------------------------------------------------------------
4175
4176; There are multiple users of the fneg that must use a VOP3
4177; instruction, so there is no penalty
4178define { half, half } @multiuse_fneg_2_vop3_users_f16(half %a, half %b, half %c) #0 {
4179; SI-LABEL: multiuse_fneg_2_vop3_users_f16:
4180; SI:       ; %bb.0:
4181; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4182; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
4183; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4184; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
4185; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
4186; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4187; SI-NEXT:    v_cvt_f32_f16_e32 v3, v0
4188; SI-NEXT:    v_fma_f32 v0, -v3, v1, v2
4189; SI-NEXT:    v_fma_f32 v1, -v3, v2, 2.0
4190; SI-NEXT:    s_setpc_b64 s[30:31]
4191;
4192; VI-LABEL: multiuse_fneg_2_vop3_users_f16:
4193; VI:       ; %bb.0:
4194; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4195; VI-NEXT:    v_fma_f16 v3, -v0, v1, v2
4196; VI-NEXT:    v_fma_f16 v1, -v0, v2, 2.0
4197; VI-NEXT:    v_mov_b32_e32 v0, v3
4198; VI-NEXT:    s_setpc_b64 s[30:31]
4199;
4200; GFX11-LABEL: multiuse_fneg_2_vop3_users_f16:
4201; GFX11:       ; %bb.0:
4202; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4203; GFX11-NEXT:    v_fma_f16 v3, -v0, v1, v2
4204; GFX11-NEXT:    v_fma_f16 v1, -v0, v2, 2.0
4205; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
4206; GFX11-NEXT:    v_mov_b32_e32 v0, v3
4207; GFX11-NEXT:    s_setpc_b64 s[30:31]
4208  %fneg.a = fneg half %a
4209  %fma0 = call half @llvm.fma.f16(half %fneg.a, half %b, half %c)
4210  %fma1 = call half @llvm.fma.f16(half %fneg.a, half %c, half 2.0)
4211  %insert.0 = insertvalue { half, half } poison, half %fma0, 0
4212  %insert.1 = insertvalue { half, half } %insert.0, half %fma1, 1
4213  ret { half, half } %insert.1
4214}
4215
4216; There are multiple users, but both require using a larger encoding
4217; for the modifier.
4218define { half, half } @multiuse_fneg_2_vop2_users_f16(half %a, half %b, half %c) #0 {
4219; SI-LABEL: multiuse_fneg_2_vop2_users_f16:
4220; SI:       ; %bb.0:
4221; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4222; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4223; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
4224; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
4225; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4226; SI-NEXT:    v_cvt_f32_f16_e32 v3, v0
4227; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
4228; SI-NEXT:    v_mul_f32_e32 v0, v3, v1
4229; SI-NEXT:    v_mul_f32_e32 v1, v3, v2
4230; SI-NEXT:    s_setpc_b64 s[30:31]
4231;
4232; VI-LABEL: multiuse_fneg_2_vop2_users_f16:
4233; VI:       ; %bb.0:
4234; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4235; VI-NEXT:    v_mul_f16_e64 v3, -v0, v1
4236; VI-NEXT:    v_mul_f16_e64 v1, -v0, v2
4237; VI-NEXT:    v_mov_b32_e32 v0, v3
4238; VI-NEXT:    s_setpc_b64 s[30:31]
4239;
4240; GFX11-LABEL: multiuse_fneg_2_vop2_users_f16:
4241; GFX11:       ; %bb.0:
4242; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4243; GFX11-NEXT:    v_mul_f16_e64 v3, -v0, v1
4244; GFX11-NEXT:    v_mul_f16_e64 v1, -v0, v2
4245; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
4246; GFX11-NEXT:    v_mov_b32_e32 v0, v3
4247; GFX11-NEXT:    s_setpc_b64 s[30:31]
4248  %fneg.a = fneg half %a
4249  %mul0 = fmul half %fneg.a, %b
4250  %mul1 = fmul half %fneg.a, %c
4251  %insert.0 = insertvalue { half, half } poison, half %mul0, 0
4252  %insert.1 = insertvalue { half, half } %insert.0, half %mul1, 1
4253  ret { half, half } %insert.1
4254}
4255
4256; One user is VOP3 so has no cost to folding the modifier, the other does.
4257define { half, half } @multiuse_fneg_vop2_vop3_users_f16(ptr addrspace(1) %out, half %a, half %b, half %c) #0 {
4258; SI-LABEL: multiuse_fneg_vop2_vop3_users_f16:
4259; SI:       ; %bb.0:
4260; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4261; SI-NEXT:    v_cvt_f16_f32_e32 v0, v3
4262; SI-NEXT:    v_cvt_f16_f32_e64 v1, -v2
4263; SI-NEXT:    v_cvt_f16_f32_e32 v2, v4
4264; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4265; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4266; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
4267; SI-NEXT:    v_fma_f32 v0, v1, v0, 2.0
4268; SI-NEXT:    v_mul_f32_e32 v1, v1, v2
4269; SI-NEXT:    s_setpc_b64 s[30:31]
4270;
4271; VI-LABEL: multiuse_fneg_vop2_vop3_users_f16:
4272; VI:       ; %bb.0:
4273; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4274; VI-NEXT:    v_fma_f16 v0, -v2, v3, 2.0
4275; VI-NEXT:    v_mul_f16_e64 v1, -v2, v4
4276; VI-NEXT:    s_setpc_b64 s[30:31]
4277;
4278; GFX11-LABEL: multiuse_fneg_vop2_vop3_users_f16:
4279; GFX11:       ; %bb.0:
4280; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4281; GFX11-NEXT:    v_fma_f16 v0, -v2, v3, 2.0
4282; GFX11-NEXT:    v_mul_f16_e64 v1, -v2, v4
4283; GFX11-NEXT:    s_setpc_b64 s[30:31]
4284  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4285  %tid.ext = sext i32 %tid to i64
4286  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
4287
4288  %fneg.a = fneg half %a
4289  %fma0 = call half @llvm.fma.f16(half %fneg.a, half %b, half 2.0)
4290  %mul1 = fmul half %fneg.a, %c
4291
4292  %insert.0 = insertvalue { half, half } poison, half %fma0, 0
4293  %insert.1 = insertvalue { half, half } %insert.0, half %mul1, 1
4294  ret { half, half } %insert.1
4295}
4296
4297; The use of the fneg requires a code size increase, but folding into
4298; the source does not
4299define { half, half } @free_fold_src_code_size_cost_use_f16(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) #0 {
4300; SI-SAFE-LABEL: free_fold_src_code_size_cost_use_f16:
4301; SI-SAFE:       ; %bb.0:
4302; SI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4303; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v0, v3
4304; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v2
4305; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v4
4306; SI-SAFE-NEXT:    v_cvt_f16_f32_e32 v3, v5
4307; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v0, v0
4308; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
4309; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
4310; SI-SAFE-NEXT:    v_cvt_f32_f16_e32 v3, v3
4311; SI-SAFE-NEXT:    v_fma_f32 v1, v1, v0, 2.0
4312; SI-SAFE-NEXT:    v_mul_f32_e64 v0, -v1, v2
4313; SI-SAFE-NEXT:    v_mul_f32_e64 v1, -v1, v3
4314; SI-SAFE-NEXT:    s_setpc_b64 s[30:31]
4315;
4316; SI-NSZ-LABEL: free_fold_src_code_size_cost_use_f16:
4317; SI-NSZ:       ; %bb.0:
4318; SI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4319; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v3
4320; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v2
4321; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v4
4322; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v3, v5
4323; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
4324; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
4325; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
4326; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v3, v3
4327; SI-NSZ-NEXT:    v_fma_f32 v1, v1, -v0, -2.0
4328; SI-NSZ-NEXT:    v_mul_f32_e32 v0, v1, v2
4329; SI-NSZ-NEXT:    v_mul_f32_e32 v1, v1, v3
4330; SI-NSZ-NEXT:    s_setpc_b64 s[30:31]
4331;
4332; VI-SAFE-LABEL: free_fold_src_code_size_cost_use_f16:
4333; VI-SAFE:       ; %bb.0:
4334; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4335; VI-SAFE-NEXT:    v_fma_f16 v1, v2, v3, 2.0
4336; VI-SAFE-NEXT:    v_mul_f16_e64 v0, -v1, v4
4337; VI-SAFE-NEXT:    v_mul_f16_e64 v1, -v1, v5
4338; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
4339;
4340; VI-NSZ-LABEL: free_fold_src_code_size_cost_use_f16:
4341; VI-NSZ:       ; %bb.0:
4342; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4343; VI-NSZ-NEXT:    v_fma_f16 v1, v2, -v3, -2.0
4344; VI-NSZ-NEXT:    v_mul_f16_e32 v0, v1, v4
4345; VI-NSZ-NEXT:    v_mul_f16_e32 v1, v1, v5
4346; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
4347;
4348; GFX11-SAFE-LABEL: free_fold_src_code_size_cost_use_f16:
4349; GFX11-SAFE:       ; %bb.0:
4350; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4351; GFX11-SAFE-NEXT:    v_fma_f16 v1, v2, v3, 2.0
4352; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4353; GFX11-SAFE-NEXT:    v_mul_f16_e64 v0, -v1, v4
4354; GFX11-SAFE-NEXT:    v_mul_f16_e64 v1, -v1, v5
4355; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
4356;
4357; GFX11-NSZ-LABEL: free_fold_src_code_size_cost_use_f16:
4358; GFX11-NSZ:       ; %bb.0:
4359; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4360; GFX11-NSZ-NEXT:    v_fma_f16 v1, v2, -v3, -2.0
4361; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4362; GFX11-NSZ-NEXT:    v_mul_f16_e32 v0, v1, v4
4363; GFX11-NSZ-NEXT:    v_mul_f16_e32 v1, v1, v5
4364; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
4365  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4366  %tid.ext = sext i32 %tid to i64
4367  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
4368
4369  %fma0 = call half @llvm.fma.f16(half %a, half %b, half 2.0)
4370  %fneg.fma0 = fneg half %fma0
4371  %mul1 = fmul half %fneg.fma0, %c
4372  %mul2 = fmul half %fneg.fma0, %d
4373
4374  %insert.0 = insertvalue { half, half } poison, half %mul1, 0
4375  %insert.1 = insertvalue { half, half } %insert.0, half %mul2, 1
4376  ret { half, half } %insert.1
4377}
4378
4379; %trunc.a has one fneg use, but it requires a code size increase and
4380; %the fneg can instead be folded for free into the fma.
4381define half @one_use_cost_to_fold_into_src_f16(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) #0 {
4382; SI-LABEL: one_use_cost_to_fold_into_src_f16:
4383; SI:       ; %bb.0:
4384; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4385; SI-NEXT:    v_cvt_f16_f32_e32 v1, v2
4386; SI-NEXT:    v_cvt_f16_f32_e32 v0, v4
4387; SI-NEXT:    v_cvt_f16_f32_e32 v2, v3
4388; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4389; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4390; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
4391; SI-NEXT:    v_trunc_f32_e32 v1, v1
4392; SI-NEXT:    v_fma_f32 v0, -v1, v2, v0
4393; SI-NEXT:    s_setpc_b64 s[30:31]
4394;
4395; VI-LABEL: one_use_cost_to_fold_into_src_f16:
4396; VI:       ; %bb.0:
4397; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4398; VI-NEXT:    v_trunc_f16_e32 v0, v2
4399; VI-NEXT:    v_fma_f16 v0, -v0, v3, v4
4400; VI-NEXT:    s_setpc_b64 s[30:31]
4401;
4402; GFX11-LABEL: one_use_cost_to_fold_into_src_f16:
4403; GFX11:       ; %bb.0:
4404; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4405; GFX11-NEXT:    v_trunc_f16_e32 v0, v2
4406; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4407; GFX11-NEXT:    v_fma_f16 v0, -v0, v3, v4
4408; GFX11-NEXT:    s_setpc_b64 s[30:31]
4409  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4410  %tid.ext = sext i32 %tid to i64
4411  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
4412
4413  %trunc.a = call half @llvm.trunc.f16(half %a)
4414  %trunc.fneg.a = fneg half %trunc.a
4415  %fma0 = call half @llvm.fma.f16(half %trunc.fneg.a, half %b, half %c)
4416  ret half %fma0
4417}
4418
4419define { half, half } @multi_use_cost_to_fold_into_src(ptr addrspace(1) %out, half %a, half %b, half %c, half %d) #0 {
4420; SI-LABEL: multi_use_cost_to_fold_into_src:
4421; SI:       ; %bb.0:
4422; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4423; SI-NEXT:    v_cvt_f16_f32_e32 v1, v2
4424; SI-NEXT:    v_cvt_f16_f32_e32 v0, v4
4425; SI-NEXT:    v_cvt_f16_f32_e32 v2, v3
4426; SI-NEXT:    v_cvt_f16_f32_e32 v3, v5
4427; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4428; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4429; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
4430; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
4431; SI-NEXT:    v_trunc_f32_e32 v1, v1
4432; SI-NEXT:    v_fma_f32 v0, -v1, v2, v0
4433; SI-NEXT:    v_mul_f32_e32 v1, v1, v3
4434; SI-NEXT:    s_setpc_b64 s[30:31]
4435;
4436; VI-LABEL: multi_use_cost_to_fold_into_src:
4437; VI:       ; %bb.0:
4438; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4439; VI-NEXT:    v_trunc_f16_e32 v1, v2
4440; VI-NEXT:    v_fma_f16 v0, -v1, v3, v4
4441; VI-NEXT:    v_mul_f16_e32 v1, v1, v5
4442; VI-NEXT:    s_setpc_b64 s[30:31]
4443;
4444; GFX11-LABEL: multi_use_cost_to_fold_into_src:
4445; GFX11:       ; %bb.0:
4446; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4447; GFX11-NEXT:    v_trunc_f16_e32 v1, v2
4448; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4449; GFX11-NEXT:    v_fma_f16 v0, -v1, v3, v4
4450; GFX11-NEXT:    v_mul_f16_e32 v1, v1, v5
4451; GFX11-NEXT:    s_setpc_b64 s[30:31]
4452  %tid = call i32 @llvm.amdgcn.workitem.id.x()
4453  %tid.ext = sext i32 %tid to i64
4454  %out.gep = getelementptr inbounds half, ptr addrspace(1) %out, i64 %tid.ext
4455  %trunc.a = call half @llvm.trunc.f16(half %a)
4456  %trunc.fneg.a = fneg half %trunc.a
4457  %fma0 = call half @llvm.fma.f16(half %trunc.fneg.a, half %b, half %c)
4458  %mul1 = fmul half %trunc.a, %d
4459  %insert.0 = insertvalue { half, half } poison, half %fma0, 0
4460  %insert.1 = insertvalue { half, half } %insert.0, half %mul1, 1
4461  ret { half, half } %insert.1
4462}
4463
4464; The AMDGPU combine to pull fneg into the FMA operands was being
4465; undone by the generic combine to pull the fneg out of the fma if
4466; !isFNegFree. We were reporting false for v2f32 even though it will
4467; be split into f32 where it will be free.
4468define <2 x half> @fneg_fma_fneg_dagcombine_loop(<2 x half> %arg, <2 x half> %arg1, <2 x half> %arg2) #0 {
4469; SI-LABEL: fneg_fma_fneg_dagcombine_loop:
4470; SI:       ; %bb.0: ; %bb
4471; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4472; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
4473; SI-NEXT:    v_cvt_f16_f32_e32 v4, v4
4474; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
4475; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
4476; SI-NEXT:    v_lshlrev_b32_e32 v6, 16, v5
4477; SI-NEXT:    v_or_b32_e32 v6, v4, v6
4478; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
4479; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4480; SI-NEXT:    v_xor_b32_e32 v6, 0x80008000, v6
4481; SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v6
4482; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
4483; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
4484; SI-NEXT:    v_cvt_f32_f16_e32 v7, v7
4485; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
4486; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4487; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4488; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
4489; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
4490; SI-NEXT:    s_brev_b32 s4, 1
4491; SI-NEXT:    v_fma_f32 v3, v3, v7, s4
4492; SI-NEXT:    v_fma_f32 v2, v2, v6, s4
4493; SI-NEXT:    v_sub_f32_e32 v1, v3, v1
4494; SI-NEXT:    v_sub_f32_e32 v0, v2, v0
4495; SI-NEXT:    v_mul_f32_e32 v0, v0, v4
4496; SI-NEXT:    v_mul_f32_e32 v1, v1, v5
4497; SI-NEXT:    s_setpc_b64 s[30:31]
4498;
4499; VI-LABEL: fneg_fma_fneg_dagcombine_loop:
4500; VI:       ; %bb.0: ; %bb
4501; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4502; VI-NEXT:    s_mov_b32 s4, 0x8000
4503; VI-NEXT:    v_fma_f16 v3, v1, -v2, s4
4504; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
4505; VI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
4506; VI-NEXT:    v_fma_f16 v1, v1, -v4, s4
4507; VI-NEXT:    v_sub_f16_e32 v3, v3, v0
4508; VI-NEXT:    v_sub_f16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
4509; VI-NEXT:    v_mul_f16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
4510; VI-NEXT:    v_mul_f16_e32 v1, v3, v2
4511; VI-NEXT:    v_or_b32_e32 v0, v1, v0
4512; VI-NEXT:    s_setpc_b64 s[30:31]
4513;
4514; GFX11-LABEL: fneg_fma_fneg_dagcombine_loop:
4515; GFX11:       ; %bb.0: ; %bb
4516; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4517; GFX11-NEXT:    v_pk_fma_f16 v1, v1, v2, 0x8000 op_sel_hi:[1,1,0] neg_lo:[0,1,0] neg_hi:[0,1,0]
4518; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
4519; GFX11-NEXT:    v_pk_add_f16 v0, v1, v0 neg_lo:[0,1] neg_hi:[0,1]
4520; GFX11-NEXT:    v_pk_mul_f16 v0, v0, v2
4521; GFX11-NEXT:    s_setpc_b64 s[30:31]
4522bb:
4523  %i3 = call fast <2 x half> @llvm.fma.v2f16(<2 x half> %arg1, <2 x half> %arg2, <2 x half> zeroinitializer)
4524  %i4 = fadd fast <2 x half> %i3, %arg
4525  %i5 = fneg <2 x half> %i4
4526  %i6 = fmul fast <2 x half> %i5, %arg2
4527  ret <2 x half> %i6
4528}
4529
4530; This expects denormal flushing, so can't turn this fmul into fneg
4531; TODO: Keeping this as fmul saves encoding size
4532define half @nnan_fmul_neg1_to_fneg(half %x, half %y) #0 {
4533; SI-LABEL: nnan_fmul_neg1_to_fneg:
4534; SI:       ; %bb.0:
4535; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4536; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4537; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
4538; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4539; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4540; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
4541; SI-NEXT:    s_setpc_b64 s[30:31]
4542;
4543; VI-LABEL: nnan_fmul_neg1_to_fneg:
4544; VI:       ; %bb.0:
4545; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4546; VI-NEXT:    v_mul_f16_e64 v0, -v0, v1
4547; VI-NEXT:    s_setpc_b64 s[30:31]
4548;
4549; GFX11-LABEL: nnan_fmul_neg1_to_fneg:
4550; GFX11:       ; %bb.0:
4551; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4552; GFX11-NEXT:    v_mul_f16_e64 v0, -v0, v1
4553; GFX11-NEXT:    s_setpc_b64 s[30:31]
4554  %mul = fmul half %x, -1.0
4555  %add = fmul nnan half %mul, %y
4556  ret half %add
4557}
4558
4559; It's legal to turn this fmul into an fneg since denormals are
4560; preserved and we know an snan can't happen from the flag.
4561define half @denormal_fmul_neg1_to_fneg(half %x, half %y) {
4562; SI-LABEL: denormal_fmul_neg1_to_fneg:
4563; SI:       ; %bb.0:
4564; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4565; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4566; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
4567; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4568; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4569; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
4570; SI-NEXT:    s_setpc_b64 s[30:31]
4571;
4572; VI-LABEL: denormal_fmul_neg1_to_fneg:
4573; VI:       ; %bb.0:
4574; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4575; VI-NEXT:    v_mul_f16_e64 v0, -v0, v1
4576; VI-NEXT:    s_setpc_b64 s[30:31]
4577;
4578; GFX11-LABEL: denormal_fmul_neg1_to_fneg:
4579; GFX11:       ; %bb.0:
4580; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4581; GFX11-NEXT:    v_mul_f16_e64 v0, -v0, v1
4582; GFX11-NEXT:    s_setpc_b64 s[30:31]
4583  %mul = fmul nnan half %x, -1.0
4584  %add = fmul half %mul, %y
4585  ret half %add
4586}
4587
4588; know the source can't be an snan
4589define half @denorm_snan_fmul_neg1_to_fneg(half %x, half %y) {
4590; SI-LABEL: denorm_snan_fmul_neg1_to_fneg:
4591; SI:       ; %bb.0:
4592; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4593; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
4594; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4595; SI-NEXT:    v_cvt_f32_f16_e32 v2, v0
4596; SI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
4597; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4598; SI-NEXT:    v_mul_f32_e32 v0, v2, v0
4599; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
4600; SI-NEXT:    s_setpc_b64 s[30:31]
4601;
4602; VI-LABEL: denorm_snan_fmul_neg1_to_fneg:
4603; VI:       ; %bb.0:
4604; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4605; VI-NEXT:    v_mul_f16_e64 v0, v0, -v0
4606; VI-NEXT:    v_mul_f16_e32 v0, v0, v1
4607; VI-NEXT:    s_setpc_b64 s[30:31]
4608;
4609; GFX11-LABEL: denorm_snan_fmul_neg1_to_fneg:
4610; GFX11:       ; %bb.0:
4611; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4612; GFX11-NEXT:    v_mul_f16_e64 v0, v0, -v0
4613; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4614; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v1
4615; GFX11-NEXT:    s_setpc_b64 s[30:31]
4616  %canonical = fmul half %x, %x
4617  %mul = fmul half %canonical, -1.0
4618  %add = fmul half %mul, %y
4619  ret half %add
4620}
4621
4622define half @flush_snan_fmul_neg1_to_fneg(half %x, half %y) #0 {
4623; SI-LABEL: flush_snan_fmul_neg1_to_fneg:
4624; SI:       ; %bb.0:
4625; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4626; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4627; SI-NEXT:    v_cvt_f16_f32_e64 v0, -v0
4628; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4629; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4630; SI-NEXT:    v_mul_f32_e32 v0, v0, v1
4631; SI-NEXT:    s_setpc_b64 s[30:31]
4632;
4633; VI-LABEL: flush_snan_fmul_neg1_to_fneg:
4634; VI:       ; %bb.0:
4635; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4636; VI-NEXT:    v_max_f16_e64 v0, -v0, -v0
4637; VI-NEXT:    v_mul_f16_e32 v0, v0, v1
4638; VI-NEXT:    s_setpc_b64 s[30:31]
4639;
4640; GFX11-LABEL: flush_snan_fmul_neg1_to_fneg:
4641; GFX11:       ; %bb.0:
4642; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4643; GFX11-NEXT:    v_max_f16_e64 v0, -v0, -v0
4644; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4645; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v1
4646; GFX11-NEXT:    s_setpc_b64 s[30:31]
4647  %quiet = call half @llvm.canonicalize.f16(half %x)
4648  %mul = fmul half %quiet, -1.0
4649  %add = fmul half %mul, %y
4650  ret half %add
4651}
4652
4653define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) {
4654; SI-LABEL: fadd_select_fneg_fneg_f16:
4655; SI:       ; %bb.0:
4656; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4657; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4658; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
4659; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
4660; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
4661; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4662; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
4663; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
4664; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4665; SI-NEXT:    v_sub_f32_e32 v0, v3, v0
4666; SI-NEXT:    s_setpc_b64 s[30:31]
4667;
4668; VI-LABEL: fadd_select_fneg_fneg_f16:
4669; VI:       ; %bb.0:
4670; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4671; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
4672; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4673; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
4674; VI-NEXT:    s_setpc_b64 s[30:31]
4675;
4676; GFX11-LABEL: fadd_select_fneg_fneg_f16:
4677; GFX11:       ; %bb.0:
4678; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4679; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
4680; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
4681; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4682; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
4683; GFX11-NEXT:    s_setpc_b64 s[30:31]
4684  %cmp = icmp eq i32 %arg0, 0
4685  %neg.x = fneg half %x
4686  %neg.y  = fneg half %y
4687  %select = select i1 %cmp, half %neg.x, half %neg.y
4688  %add = fadd half %select, %z
4689  ret half %add
4690}
4691
4692; FIXME: Terrible code for SI
4693define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
4694; SI-LABEL: fadd_select_fneg_fneg_v2f16:
4695; SI:       ; %bb.0:
4696; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4697; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
4698; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
4699; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
4700; SI-NEXT:    v_cvt_f16_f32_e32 v5, v5
4701; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4702; SI-NEXT:    v_or_b32_e32 v1, v1, v2
4703; SI-NEXT:    v_cvt_f16_f32_e32 v2, v4
4704; SI-NEXT:    v_cvt_f16_f32_e32 v4, v6
4705; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
4706; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
4707; SI-NEXT:    v_or_b32_e32 v2, v3, v2
4708; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4709; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
4710; SI-NEXT:    v_cvt_f32_f16_e32 v3, v4
4711; SI-NEXT:    v_cvt_f32_f16_e32 v4, v5
4712; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
4713; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
4714; SI-NEXT:    v_sub_f32_e32 v0, v4, v0
4715; SI-NEXT:    v_sub_f32_e32 v1, v3, v1
4716; SI-NEXT:    s_setpc_b64 s[30:31]
4717;
4718; VI-LABEL: fadd_select_fneg_fneg_v2f16:
4719; VI:       ; %bb.0:
4720; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4721; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
4722; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
4723; VI-NEXT:    v_sub_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
4724; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
4725; VI-NEXT:    v_or_b32_e32 v0, v0, v1
4726; VI-NEXT:    s_setpc_b64 s[30:31]
4727;
4728; GFX11-LABEL: fadd_select_fneg_fneg_v2f16:
4729; GFX11:       ; %bb.0:
4730; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4731; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
4732; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
4733; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
4734; GFX11-NEXT:    v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
4735; GFX11-NEXT:    s_setpc_b64 s[30:31]
4736  %cmp = icmp eq i32 %arg0, 0
4737  %neg.x = fneg <2 x half> %x
4738  %neg.y = fneg <2 x half> %y
4739  %select = select i1 %cmp, <2 x half> %neg.x, <2 x half> %neg.y
4740  %add = fadd <2 x half> %select, %z
4741  ret <2 x half> %add
4742}
4743
4744declare i32 @llvm.amdgcn.workitem.id.x() #1
4745declare half @llvm.sin.f16(half) #1
4746declare half @llvm.trunc.f16(half) #1
4747declare half @llvm.round.f16(half) #1
4748declare half @llvm.rint.f16(half) #1
4749declare half @llvm.nearbyint.f16(half) #1
4750declare half @llvm.roundeven.f16(half) #1
4751declare half @llvm.canonicalize.f16(half) #1
4752declare half @llvm.minnum.f16(half, half) #1
4753declare half @llvm.maxnum.f16(half, half) #1
4754declare half @llvm.fma.f16(half, half, half) #1
4755declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
4756declare half @llvm.fmuladd.f16(half, half, half) #1
4757declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
4758
4759attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
4760attributes #1 = { nounwind readnone }
4761attributes #2 = { nounwind "unsafe-fp-math"="true" }
4762attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" }
4763attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
4764