xref: /llvm-project/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s
3; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE %s
5
6; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s
7; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s
8; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ %s
9
10define half @add_select_fabs_fabs_f16(i32 %c, half %x, half %y, half %z) {
11; CI-LABEL: add_select_fabs_fabs_f16:
12; CI:       ; %bb.0:
13; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
15; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
16; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
17; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
18; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
19; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
20; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
21; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
22; CI-NEXT:    v_add_f32_e64 v0, |v0|, v3
23; CI-NEXT:    s_setpc_b64 s[30:31]
24;
25; VI-LABEL: add_select_fabs_fabs_f16:
26; VI:       ; %bb.0:
27; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
29; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
30; VI-NEXT:    v_add_f16_e64 v0, |v0|, v3
31; VI-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX11-LABEL: add_select_fabs_fabs_f16:
34; GFX11:       ; %bb.0:
35; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
37; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
38; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
39; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v3
40; GFX11-NEXT:    s_setpc_b64 s[30:31]
41  %cmp = icmp eq i32 %c, 0
42  %fabs.x = call half @llvm.fabs.f16(half %x)
43  %fabs.y = call half @llvm.fabs.f16(half %y)
44  %select = select i1 %cmp, half %fabs.x, half %fabs.y
45  %add = fadd half %select, %z
46  ret half %add
47}
48
49define { half, half } @add_select_multi_use_lhs_fabs_fabs_f16(i32 %c, half %x, half %y, half %w, half %z) {
50; CI-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
51; CI:       ; %bb.0:
52; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
54; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
55; CI-NEXT:    v_cvt_f16_f32_e32 v4, v4
56; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
57; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
58; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
59; CI-NEXT:    v_cvt_f32_f16_e32 v4, v4
60; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
61; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
62; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
63; CI-NEXT:    v_add_f32_e64 v0, |v0|, v4
64; CI-NEXT:    v_add_f32_e64 v1, |v1|, v3
65; CI-NEXT:    s_setpc_b64 s[30:31]
66;
67; VI-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
68; VI:       ; %bb.0:
69; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
70; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
71; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
72; VI-NEXT:    v_add_f16_e64 v0, |v0|, v4
73; VI-NEXT:    v_add_f16_e64 v1, |v1|, v3
74; VI-NEXT:    s_setpc_b64 s[30:31]
75;
76; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_f16:
77; GFX11:       ; %bb.0:
78; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
80; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
81; GFX11-NEXT:    v_add_f16_e64 v1, |v1|, v3
82; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
83; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v4
84; GFX11-NEXT:    s_setpc_b64 s[30:31]
85  %cmp = icmp eq i32 %c, 0
86  %fabs.x = call half @llvm.fabs.f16(half %x)
87  %fabs.y = call half @llvm.fabs.f16(half %y)
88  %select = select i1 %cmp, half %fabs.x, half %fabs.y
89  %add0 = fadd half %select, %z
90  %add1 = fadd half %fabs.x, %w
91  %insert.0 = insertvalue { half, half } poison, half %add0, 0
92  %insert.1 = insertvalue { half, half } %insert.0, half %add1, 1
93  ret { half, half } %insert.1
94}
95
96define { half, half } @add_select_multi_store_use_lhs_fabs_fabs_f16(i32 %c, half %x, half %y, half %z) {
97; CI-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
98; CI:       ; %bb.0:
99; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
101; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
102; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
103; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
104; CI-NEXT:    v_cvt_f32_f16_e32 v4, v1
105; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
106; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
107; CI-NEXT:    v_and_b32_e32 v1, 0x7fffffff, v4
108; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
109; CI-NEXT:    v_add_f32_e64 v0, |v0|, v3
110; CI-NEXT:    s_setpc_b64 s[30:31]
111;
112; VI-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
113; VI:       ; %bb.0:
114; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
116; VI-NEXT:    v_and_b32_e32 v4, 0x7fff, v1
117; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
118; VI-NEXT:    v_add_f16_e64 v0, |v0|, v3
119; VI-NEXT:    v_mov_b32_e32 v1, v4
120; VI-NEXT:    s_setpc_b64 s[30:31]
121;
122; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16:
123; GFX11:       ; %bb.0:
124; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
126; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
127; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
128; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
129; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v3
130; GFX11-NEXT:    s_setpc_b64 s[30:31]
131  %cmp = icmp eq i32 %c, 0
132  %fabs.x = call half @llvm.fabs.f16(half %x)
133  %fabs.y = call half @llvm.fabs.f16(half %y)
134  %select = select i1 %cmp, half %fabs.x, half %fabs.y
135  %add0 = fadd half %select, %z
136  %insert.0 = insertvalue { half, half } poison, half %add0, 0
137  %insert.1 = insertvalue { half, half } %insert.0, half %fabs.x, 1
138  ret { half, half } %insert.1
139}
140
141define { half, half } @add_select_multi_use_rhs_fabs_fabs_f16(i32 %c, half %x, half %y, half %z, half %w) {
142; CI-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
143; CI:       ; %bb.0:
144; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
146; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
147; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
148; CI-NEXT:    v_cvt_f16_f32_e32 v4, v4
149; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
150; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
151; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
152; CI-NEXT:    v_cvt_f32_f16_e32 v4, v4
153; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
154; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
155; CI-NEXT:    v_add_f32_e64 v0, |v0|, v3
156; CI-NEXT:    v_add_f32_e64 v1, |v2|, v4
157; CI-NEXT:    s_setpc_b64 s[30:31]
158;
159; VI-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
160; VI:       ; %bb.0:
161; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
163; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
164; VI-NEXT:    v_add_f16_e64 v0, |v0|, v3
165; VI-NEXT:    v_add_f16_e64 v1, |v2|, v4
166; VI-NEXT:    s_setpc_b64 s[30:31]
167;
168; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_f16:
169; GFX11:       ; %bb.0:
170; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
172; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
173; GFX11-NEXT:    v_add_f16_e64 v1, |v2|, v4
174; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
175; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v3
176; GFX11-NEXT:    s_setpc_b64 s[30:31]
177  %cmp = icmp eq i32 %c, 0
178  %fabs.x = call half @llvm.fabs.f16(half %x)
179  %fabs.y = call half @llvm.fabs.f16(half %y)
180  %select = select i1 %cmp, half %fabs.x, half %fabs.y
181  %add0 = fadd half %select, %z
182  %add1 = fadd half %fabs.y, %w
183  %insert.0 = insertvalue { half, half } poison, half %add0, 0
184  %insert.1 = insertvalue { half, half } %insert.0, half %add1, 1
185  ret { half, half } %insert.1
186}
187
188define half @add_select_fabs_var_f16(i32 %c, half %x, half %y, half %z) {
189; CI-LABEL: add_select_fabs_var_f16:
190; CI:       ; %bb.0:
191; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
192; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
193; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
194; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
195; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
196; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
197; CI-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
198; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
199; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
200; CI-NEXT:    v_add_f32_e32 v0, v0, v3
201; CI-NEXT:    s_setpc_b64 s[30:31]
202;
203; VI-LABEL: add_select_fabs_var_f16:
204; VI:       ; %bb.0:
205; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
207; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
208; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
209; VI-NEXT:    v_add_f16_e32 v0, v0, v3
210; VI-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX11-LABEL: add_select_fabs_var_f16:
213; GFX11:       ; %bb.0:
214; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
216; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
217; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
218; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
219; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
220; GFX11-NEXT:    s_setpc_b64 s[30:31]
221  %cmp = icmp eq i32 %c, 0
222  %fabs.x = call half @llvm.fabs.f16(half %x)
223  %select = select i1 %cmp, half %fabs.x, half %y
224  %add = fadd half %select, %z
225  ret half %add
226}
227
228define half @add_select_fabs_negk_f16(i32 %c, half %x, half %y) {
229; CI-LABEL: add_select_fabs_negk_f16:
230; CI:       ; %bb.0:
231; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
233; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
234; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
235; CI-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
236; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
237; CI-NEXT:    v_cndmask_b32_e32 v0, -1.0, v1, vcc
238; CI-NEXT:    v_add_f32_e32 v0, v0, v2
239; CI-NEXT:    s_setpc_b64 s[30:31]
240;
241; VI-LABEL: add_select_fabs_negk_f16:
242; VI:       ; %bb.0:
243; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
245; VI-NEXT:    v_mov_b32_e32 v3, 0xbc00
246; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
247; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
248; VI-NEXT:    v_add_f16_e32 v0, v0, v2
249; VI-NEXT:    s_setpc_b64 s[30:31]
250;
251; GFX11-LABEL: add_select_fabs_negk_f16:
252; GFX11:       ; %bb.0:
253; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
255; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
256; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
257; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
258; GFX11-NEXT:    v_add_f16_e32 v0, v0, v2
259; GFX11-NEXT:    s_setpc_b64 s[30:31]
260  %cmp = icmp eq i32 %c, 0
261  %fabs = call half @llvm.fabs.f16(half %x)
262  %select = select i1 %cmp, half %fabs, half -1.0
263  %add = fadd half %select, %y
264  ret half %add
265}
266
267; FIXME: fabs should fold away
268define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) {
269; CI-LABEL: add_select_fabs_negk_negk_f16:
270; CI:       ; %bb.0:
271; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
273; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
274; CI-NEXT:    v_cndmask_b32_e64 v0, -1.0, -2.0, vcc
275; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
276; CI-NEXT:    v_add_f32_e64 v0, |v0|, v1
277; CI-NEXT:    s_setpc_b64 s[30:31]
278;
279; VI-LABEL: add_select_fabs_negk_negk_f16:
280; VI:       ; %bb.0:
281; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; VI-NEXT:    v_mov_b32_e32 v2, 0xbc00
283; VI-NEXT:    v_mov_b32_e32 v3, 0xc000
284; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
285; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
286; VI-NEXT:    v_add_f16_e64 v0, |v0|, v1
287; VI-NEXT:    s_setpc_b64 s[30:31]
288;
289; GFX11-LABEL: add_select_fabs_negk_negk_f16:
290; GFX11:       ; %bb.0:
291; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; GFX11-NEXT:    v_mov_b32_e32 v2, 0xc000
293; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
294; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
295; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
296; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v1
297; GFX11-NEXT:    s_setpc_b64 s[30:31]
298  %cmp = icmp eq i32 %c, 0
299  %select = select i1 %cmp, half -2.0, half -1.0
300  %fabs = call half @llvm.fabs.f16(half %select)
301  %add = fadd half %fabs, %x
302  ret half %add
303}
304
305define half @add_select_posk_posk_f16(i32 %c, half %x) {
306; CI-LABEL: add_select_posk_posk_f16:
307; CI:       ; %bb.0:
308; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
310; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
311; CI-NEXT:    v_cndmask_b32_e64 v0, 1.0, 2.0, vcc
312; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
313; CI-NEXT:    v_add_f32_e32 v0, v0, v1
314; CI-NEXT:    s_setpc_b64 s[30:31]
315;
316; VI-LABEL: add_select_posk_posk_f16:
317; VI:       ; %bb.0:
318; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319; VI-NEXT:    v_mov_b32_e32 v2, 0x3c00
320; VI-NEXT:    v_mov_b32_e32 v3, 0x4000
321; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
322; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
323; VI-NEXT:    v_add_f16_e32 v0, v0, v1
324; VI-NEXT:    s_setpc_b64 s[30:31]
325;
326; GFX11-LABEL: add_select_posk_posk_f16:
327; GFX11:       ; %bb.0:
328; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
329; GFX11-NEXT:    v_mov_b32_e32 v2, 0x4000
330; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
331; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
332; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
333; GFX11-NEXT:    v_add_f16_e32 v0, v0, v1
334; GFX11-NEXT:    s_setpc_b64 s[30:31]
335  %cmp = icmp eq i32 %c, 0
336  %select = select i1 %cmp, half 2.0, half 1.0
337  %add = fadd half %select, %x
338  ret half %add
339}
340
341define half @add_select_negk_fabs_f16(i32 %c, half %x, half %y) {
342; CI-LABEL: add_select_negk_fabs_f16:
343; CI:       ; %bb.0:
344; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
346; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
347; CI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
348; CI-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
349; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
350; CI-NEXT:    v_cndmask_b32_e32 v0, -1.0, v1, vcc
351; CI-NEXT:    v_add_f32_e32 v0, v0, v2
352; CI-NEXT:    s_setpc_b64 s[30:31]
353;
354; VI-LABEL: add_select_negk_fabs_f16:
355; VI:       ; %bb.0:
356; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
357; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
358; VI-NEXT:    v_mov_b32_e32 v3, 0xbc00
359; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
360; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
361; VI-NEXT:    v_add_f16_e32 v0, v0, v2
362; VI-NEXT:    s_setpc_b64 s[30:31]
363;
364; GFX11-LABEL: add_select_negk_fabs_f16:
365; GFX11:       ; %bb.0:
366; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
367; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
368; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
369; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
370; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
371; GFX11-NEXT:    v_add_f16_e32 v0, v0, v2
372; GFX11-NEXT:    s_setpc_b64 s[30:31]
373  %cmp = icmp eq i32 %c, 0
374  %fabs = call half @llvm.fabs.f16(half %x)
375  %select = select i1 %cmp, half -1.0, half %fabs
376  %add = fadd half %select, %y
377  ret half %add
378}
379
380define half @add_select_negliteralk_fabs_f16(i32 %c, half %x, half %y) {
381; CI-LABEL: add_select_negliteralk_fabs_f16:
382; CI:       ; %bb.0:
383; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
385; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
386; CI-NEXT:    v_mov_b32_e32 v3, 0xc4800000
387; CI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
388; CI-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
389; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
390; CI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
391; CI-NEXT:    v_add_f32_e32 v0, v0, v2
392; CI-NEXT:    s_setpc_b64 s[30:31]
393;
394; VI-LABEL: add_select_negliteralk_fabs_f16:
395; VI:       ; %bb.0:
396; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
398; VI-NEXT:    v_mov_b32_e32 v3, 0xe400
399; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
400; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
401; VI-NEXT:    v_add_f16_e32 v0, v0, v2
402; VI-NEXT:    s_setpc_b64 s[30:31]
403;
404; GFX11-LABEL: add_select_negliteralk_fabs_f16:
405; GFX11:       ; %bb.0:
406; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
407; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
408; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
409; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
410; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xe400, v1, vcc_lo
411; GFX11-NEXT:    v_add_f16_e32 v0, v0, v2
412; GFX11-NEXT:    s_setpc_b64 s[30:31]
413  %cmp = icmp eq i32 %c, 0
414  %fabs = call half @llvm.fabs.f16(half %x)
415  %select = select i1 %cmp, half -1024.0, half %fabs
416  %add = fadd half %select, %y
417  ret half %add
418}
419
420define half @add_select_fabs_posk_f16(i32 %c, half %x, half %y) {
421; CI-LABEL: add_select_fabs_posk_f16:
422; CI:       ; %bb.0:
423; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
425; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
426; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
427; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
428; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
429; CI-NEXT:    v_cndmask_b32_e32 v0, 1.0, v1, vcc
430; CI-NEXT:    v_add_f32_e64 v0, |v0|, v2
431; CI-NEXT:    s_setpc_b64 s[30:31]
432;
433; VI-LABEL: add_select_fabs_posk_f16:
434; VI:       ; %bb.0:
435; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
436; VI-NEXT:    v_mov_b32_e32 v3, 0x3c00
437; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
438; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
439; VI-NEXT:    v_add_f16_e64 v0, |v0|, v2
440; VI-NEXT:    s_setpc_b64 s[30:31]
441;
442; GFX11-LABEL: add_select_fabs_posk_f16:
443; GFX11:       ; %bb.0:
444; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
445; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
446; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
447; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
448; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v2
449; GFX11-NEXT:    s_setpc_b64 s[30:31]
450  %cmp = icmp eq i32 %c, 0
451  %fabs = call half @llvm.fabs.f16(half %x)
452  %select = select i1 %cmp, half %fabs, half 1.0
453  %add = fadd half %select, %y
454  ret half %add
455}
456
457define half @add_select_posk_fabs_f16(i32 %c, half %x, half %y) {
458; CI-LABEL: add_select_posk_fabs_f16:
459; CI:       ; %bb.0:
460; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
461; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
462; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
463; CI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
464; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
465; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
466; CI-NEXT:    v_cndmask_b32_e32 v0, 1.0, v1, vcc
467; CI-NEXT:    v_add_f32_e64 v0, |v0|, v2
468; CI-NEXT:    s_setpc_b64 s[30:31]
469;
470; VI-LABEL: add_select_posk_fabs_f16:
471; VI:       ; %bb.0:
472; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
473; VI-NEXT:    v_mov_b32_e32 v3, 0x3c00
474; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
475; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
476; VI-NEXT:    v_add_f16_e64 v0, |v0|, v2
477; VI-NEXT:    s_setpc_b64 s[30:31]
478;
479; GFX11-LABEL: add_select_posk_fabs_f16:
480; GFX11:       ; %bb.0:
481; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
483; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
484; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
485; GFX11-NEXT:    v_add_f16_e64 v0, |v0|, v2
486; GFX11-NEXT:    s_setpc_b64 s[30:31]
487  %cmp = icmp eq i32 %c, 0
488  %fabs = call half @llvm.fabs.f16(half %x)
489  %select = select i1 %cmp, half 1.0, half %fabs
490  %add = fadd half %select, %y
491  ret half %add
492}
493
494define half @add_select_fneg_fneg_f16(i32 %c, half %x, half %y, half %z) {
495; CI-LABEL: add_select_fneg_fneg_f16:
496; CI:       ; %bb.0:
497; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
499; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
500; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
501; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
502; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
503; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
504; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
505; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
506; CI-NEXT:    v_sub_f32_e32 v0, v3, v0
507; CI-NEXT:    s_setpc_b64 s[30:31]
508;
509; VI-LABEL: add_select_fneg_fneg_f16:
510; VI:       ; %bb.0:
511; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
513; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
514; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
515; VI-NEXT:    s_setpc_b64 s[30:31]
516;
517; GFX11-LABEL: add_select_fneg_fneg_f16:
518; GFX11:       ; %bb.0:
519; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
520; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
521; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
522; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
523; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
524; GFX11-NEXT:    s_setpc_b64 s[30:31]
525  %cmp = icmp eq i32 %c, 0
526  %fneg.x = fneg half %x
527  %fneg.y = fneg half %y
528  %select = select i1 %cmp, half %fneg.x, half %fneg.y
529  %add = fadd half %select, %z
530  ret half %add
531}
532
533define { half, half } @add_select_multi_use_lhs_fneg_fneg_f16(i32 %c, half %x, half %y, half %z, half %w) {
534; CI-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
535; CI:       ; %bb.0:
536; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
537; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
538; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
539; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
540; CI-NEXT:    v_cvt_f16_f32_e32 v4, v4
541; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
542; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
543; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
544; CI-NEXT:    v_cvt_f32_f16_e32 v4, v4
545; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
546; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
547; CI-NEXT:    v_sub_f32_e32 v0, v3, v0
548; CI-NEXT:    v_sub_f32_e32 v1, v4, v1
549; CI-NEXT:    s_setpc_b64 s[30:31]
550;
551; VI-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
552; VI:       ; %bb.0:
553; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
555; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
556; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
557; VI-NEXT:    v_sub_f16_e32 v1, v4, v1
558; VI-NEXT:    s_setpc_b64 s[30:31]
559;
560; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_f16:
561; GFX11:       ; %bb.0:
562; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
563; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
564; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
565; GFX11-NEXT:    v_sub_f16_e32 v1, v4, v1
566; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
567; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
568; GFX11-NEXT:    s_setpc_b64 s[30:31]
569  %cmp = icmp eq i32 %c, 0
570  %fneg.x = fneg half %x
571  %fneg.y = fneg half %y
572  %select = select i1 %cmp, half %fneg.x, half %fneg.y
573  %add0 = fadd half %select, %z
574  %add1 = fadd half %fneg.x, %w
575  %insert.0 = insertvalue { half, half } poison, half %add0, 0
576  %insert.1 = insertvalue { half, half } %insert.0, half %add1, 1
577  ret { half, half } %insert.1
578}
579
580define { half, half } @add_select_multi_store_use_lhs_fneg_fneg_f16(i32 %c, half %x, half %y, half %z) {
581; CI-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
582; CI:       ; %bb.0:
583; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
585; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
586; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
587; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
588; CI-NEXT:    v_cvt_f32_f16_e32 v4, v1
589; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
590; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
591; CI-NEXT:    v_cvt_f32_f16_e64 v1, -v1
592; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v4, vcc
593; CI-NEXT:    v_sub_f32_e32 v0, v3, v0
594; CI-NEXT:    s_setpc_b64 s[30:31]
595;
596; VI-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
597; VI:       ; %bb.0:
598; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
600; VI-NEXT:    v_xor_b32_e32 v4, 0x8000, v1
601; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
602; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
603; VI-NEXT:    v_mov_b32_e32 v1, v4
604; VI-NEXT:    s_setpc_b64 s[30:31]
605;
606; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16:
607; GFX11:       ; %bb.0:
608; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
610; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
611; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
612; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
613; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
614; GFX11-NEXT:    s_setpc_b64 s[30:31]
615  %cmp = icmp eq i32 %c, 0
616  %fneg.x = fneg half %x
617  %fneg.y = fneg half %y
618  %select = select i1 %cmp, half %fneg.x, half %fneg.y
619  %add0 = fadd half %select, %z
620  %insert.0 = insertvalue { half, half } poison, half %add0, 0
621  %insert.1 = insertvalue { half, half } %insert.0, half %fneg.x, 1
622  ret { half, half } %insert.1
623}
624
625define { half, half } @add_select_multi_use_rhs_fneg_fneg_f16(i32 %c, half %x, half %y, half %z, half %w) {
626; CI-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
627; CI:       ; %bb.0:
628; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
630; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
631; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
632; CI-NEXT:    v_cvt_f16_f32_e32 v4, v4
633; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
634; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
635; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
636; CI-NEXT:    v_cvt_f32_f16_e32 v4, v4
637; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
638; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
639; CI-NEXT:    v_sub_f32_e32 v0, v3, v0
640; CI-NEXT:    v_sub_f32_e32 v1, v4, v2
641; CI-NEXT:    s_setpc_b64 s[30:31]
642;
643; VI-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
644; VI:       ; %bb.0:
645; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
647; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
648; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
649; VI-NEXT:    v_sub_f16_e32 v1, v4, v2
650; VI-NEXT:    s_setpc_b64 s[30:31]
651;
652; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_f16:
653; GFX11:       ; %bb.0:
654; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
655; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
656; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
657; GFX11-NEXT:    v_sub_f16_e32 v1, v4, v2
658; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
659; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
660; GFX11-NEXT:    s_setpc_b64 s[30:31]
661  %cmp = icmp eq i32 %c, 0
662  %fneg.x = fneg half %x
663  %fneg.y = fneg half %y
664  %select = select i1 %cmp, half %fneg.x, half %fneg.y
665  %add0 = fadd half %select, %z
666  %add1 = fadd half %fneg.y, %w
667  %insert.0 = insertvalue { half, half } poison, half %add0, 0
668  %insert.1 = insertvalue { half, half } %insert.0, half %add1, 1
669  ret { half, half } %insert.1
670}
671
672define half @add_select_fneg_var_f16(i32 %c, half %x, half %y, half %z) {
673; CI-LABEL: add_select_fneg_var_f16:
674; CI:       ; %bb.0:
675; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
676; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
677; CI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
678; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
679; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
680; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
681; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
682; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
683; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
684; CI-NEXT:    v_add_f32_e32 v0, v0, v3
685; CI-NEXT:    s_setpc_b64 s[30:31]
686;
687; VI-LABEL: add_select_fneg_var_f16:
688; VI:       ; %bb.0:
689; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690; VI-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
691; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
692; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
693; VI-NEXT:    v_add_f16_e32 v0, v0, v3
694; VI-NEXT:    s_setpc_b64 s[30:31]
695;
696; GFX11-LABEL: add_select_fneg_var_f16:
697; GFX11:       ; %bb.0:
698; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
700; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
701; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
702; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
703; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
704; GFX11-NEXT:    s_setpc_b64 s[30:31]
705  %cmp = icmp eq i32 %c, 0
706  %fneg.x = fneg half %x
707  %select = select i1 %cmp, half %fneg.x, half %y
708  %add = fadd half %select, %z
709  ret half %add
710}
711
712define half @add_select_fneg_negk_f16(i32 %c, half %x, half %y) {
713; CI-LABEL: add_select_fneg_negk_f16:
714; CI:       ; %bb.0:
715; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
717; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
718; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
719; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
720; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
721; CI-NEXT:    v_cndmask_b32_e32 v0, 1.0, v1, vcc
722; CI-NEXT:    v_sub_f32_e32 v0, v2, v0
723; CI-NEXT:    s_setpc_b64 s[30:31]
724;
725; VI-LABEL: add_select_fneg_negk_f16:
726; VI:       ; %bb.0:
727; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
728; VI-NEXT:    v_mov_b32_e32 v3, 0x3c00
729; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
730; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
731; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
732; VI-NEXT:    s_setpc_b64 s[30:31]
733;
734; GFX11-LABEL: add_select_fneg_negk_f16:
735; GFX11:       ; %bb.0:
736; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
738; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
739; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
740; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
741; GFX11-NEXT:    s_setpc_b64 s[30:31]
742  %cmp = icmp eq i32 %c, 0
743  %fneg.x = fneg half %x
744  %select = select i1 %cmp, half %fneg.x, half -1.0
745  %add = fadd half %select, %y
746  ret half %add
747}
748
749define half @add_select_fneg_inv2pi_f16(i32 %c, half %x, half %y) {
750; CI-LABEL: add_select_fneg_inv2pi_f16:
751; CI:       ; %bb.0:
752; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
753; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
754; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
755; CI-NEXT:    v_mov_b32_e32 v3, 0xbe230000
756; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
757; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
758; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
759; CI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
760; CI-NEXT:    v_sub_f32_e32 v0, v2, v0
761; CI-NEXT:    s_setpc_b64 s[30:31]
762;
763; VI-LABEL: add_select_fneg_inv2pi_f16:
764; VI:       ; %bb.0:
765; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
766; VI-NEXT:    v_mov_b32_e32 v3, 0xb118
767; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
768; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
769; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
770; VI-NEXT:    s_setpc_b64 s[30:31]
771;
772; GFX11-LABEL: add_select_fneg_inv2pi_f16:
773; GFX11:       ; %bb.0:
774; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
775; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
776; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xb118, v1, vcc_lo
777; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
778; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
779; GFX11-NEXT:    s_setpc_b64 s[30:31]
780  %cmp = icmp eq i32 %c, 0
781  %fneg.x = fneg half %x
782  %select = select i1 %cmp, half %fneg.x, half 0xH3118
783  %add = fadd half %select, %y
784  ret half %add
785}
786
787define half @add_select_fneg_neginv2pi_f16(i32 %c, half %x, half %y) {
788; CI-LABEL: add_select_fneg_neginv2pi_f16:
789; CI:       ; %bb.0:
790; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
791; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
792; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
793; CI-NEXT:    v_mov_b32_e32 v3, 0x3e230000
794; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
795; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
796; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
797; CI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
798; CI-NEXT:    v_sub_f32_e32 v0, v2, v0
799; CI-NEXT:    s_setpc_b64 s[30:31]
800;
801; VI-LABEL: add_select_fneg_neginv2pi_f16:
802; VI:       ; %bb.0:
803; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
804; VI-NEXT:    v_mov_b32_e32 v3, 0x3118
805; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
806; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
807; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
808; VI-NEXT:    s_setpc_b64 s[30:31]
809;
810; GFX11-LABEL: add_select_fneg_neginv2pi_f16:
811; GFX11:       ; %bb.0:
812; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
813; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
814; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3118, v1, vcc_lo
815; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
816; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
817; GFX11-NEXT:    s_setpc_b64 s[30:31]
818  %cmp = icmp eq i32 %c, 0
819  %fneg.x = fneg half %x
820  %select = select i1 %cmp, half %fneg.x, half 0xHB118
821  %add = fadd half %select, %y
822  ret half %add
823}
824
825define half @add_select_negk_negk_f16(i32 %c, half %x) {
826; CI-LABEL: add_select_negk_negk_f16:
827; CI:       ; %bb.0:
828; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
829; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
830; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
831; CI-NEXT:    v_cndmask_b32_e64 v0, -1.0, -2.0, vcc
832; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
833; CI-NEXT:    v_add_f32_e32 v0, v0, v1
834; CI-NEXT:    s_setpc_b64 s[30:31]
835;
836; VI-LABEL: add_select_negk_negk_f16:
837; VI:       ; %bb.0:
838; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839; VI-NEXT:    v_mov_b32_e32 v2, 0xbc00
840; VI-NEXT:    v_mov_b32_e32 v3, 0xc000
841; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
842; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
843; VI-NEXT:    v_add_f16_e32 v0, v0, v1
844; VI-NEXT:    s_setpc_b64 s[30:31]
845;
846; GFX11-LABEL: add_select_negk_negk_f16:
847; GFX11:       ; %bb.0:
848; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
849; GFX11-NEXT:    v_mov_b32_e32 v2, 0xc000
850; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
851; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
852; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
853; GFX11-NEXT:    v_add_f16_e32 v0, v0, v1
854; GFX11-NEXT:    s_setpc_b64 s[30:31]
855  %cmp = icmp eq i32 %c, 0
856  %select = select i1 %cmp, half -2.0, half -1.0
857  %add = fadd half %select, %x
858  ret half %add
859}
860
861define half @add_select_negliteralk_negliteralk_f16(i32 %c, half %x) {
862; CI-LABEL: add_select_negliteralk_negliteralk_f16:
863; CI:       ; %bb.0:
864; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
865; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
866; CI-NEXT:    v_mov_b32_e32 v2, 0xc5800000
867; CI-NEXT:    v_mov_b32_e32 v3, 0xc5000000
868; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
869; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
870; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
871; CI-NEXT:    v_add_f32_e32 v0, v0, v1
872; CI-NEXT:    s_setpc_b64 s[30:31]
873;
874; VI-LABEL: add_select_negliteralk_negliteralk_f16:
875; VI:       ; %bb.0:
876; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
877; VI-NEXT:    v_mov_b32_e32 v2, 0xec00
878; VI-NEXT:    v_mov_b32_e32 v3, 0xe800
879; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
880; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
881; VI-NEXT:    v_add_f16_e32 v0, v0, v1
882; VI-NEXT:    s_setpc_b64 s[30:31]
883;
884; GFX11-LABEL: add_select_negliteralk_negliteralk_f16:
885; GFX11:       ; %bb.0:
886; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
887; GFX11-NEXT:    v_mov_b32_e32 v2, 0xe800
888; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
889; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
890; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xec00, v2, vcc_lo
891; GFX11-NEXT:    v_add_f16_e32 v0, v0, v1
892; GFX11-NEXT:    s_setpc_b64 s[30:31]
893  %cmp = icmp eq i32 %c, 0
894  %select = select i1 %cmp, half -2048.0, half -4096.0
895  %add = fadd half %select, %x
896  ret half %add
897}
898
899define half @add_select_fneg_negk_negk_f16(i32 %c, half %x) {
900; CI-LABEL: add_select_fneg_negk_negk_f16:
901; CI:       ; %bb.0:
902; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
904; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
905; CI-NEXT:    v_cndmask_b32_e64 v0, -1.0, -2.0, vcc
906; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
907; CI-NEXT:    v_sub_f32_e32 v0, v1, v0
908; CI-NEXT:    s_setpc_b64 s[30:31]
909;
910; VI-LABEL: add_select_fneg_negk_negk_f16:
911; VI:       ; %bb.0:
912; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913; VI-NEXT:    v_mov_b32_e32 v2, 0xbc00
914; VI-NEXT:    v_mov_b32_e32 v3, 0xc000
915; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
916; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
917; VI-NEXT:    v_sub_f16_e32 v0, v1, v0
918; VI-NEXT:    s_setpc_b64 s[30:31]
919;
920; GFX11-LABEL: add_select_fneg_negk_negk_f16:
921; GFX11:       ; %bb.0:
922; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
923; GFX11-NEXT:    v_mov_b32_e32 v2, 0xc000
924; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
925; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
926; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
927; GFX11-NEXT:    v_sub_f16_e32 v0, v1, v0
928; GFX11-NEXT:    s_setpc_b64 s[30:31]
929  %cmp = icmp eq i32 %c, 0
930  %select = select i1 %cmp, half -2.0, half -1.0
931  %fneg.x = fneg half %select
932  %add = fadd half %fneg.x, %x
933  ret half %add
934}
935
936define half @add_select_negk_fneg_f16(i32 %c, half %x, half %y) {
937; CI-LABEL: add_select_negk_fneg_f16:
938; CI:       ; %bb.0:
939; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
941; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
942; CI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
943; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
944; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
945; CI-NEXT:    v_cndmask_b32_e32 v0, 1.0, v1, vcc
946; CI-NEXT:    v_sub_f32_e32 v0, v2, v0
947; CI-NEXT:    s_setpc_b64 s[30:31]
948;
949; VI-LABEL: add_select_negk_fneg_f16:
950; VI:       ; %bb.0:
951; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
952; VI-NEXT:    v_mov_b32_e32 v3, 0x3c00
953; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
954; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
955; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
956; VI-NEXT:    s_setpc_b64 s[30:31]
957;
958; GFX11-LABEL: add_select_negk_fneg_f16:
959; GFX11:       ; %bb.0:
960; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
961; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
962; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo
963; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
964; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
965; GFX11-NEXT:    s_setpc_b64 s[30:31]
966  %cmp = icmp eq i32 %c, 0
967  %fneg.x = fneg half %x
968  %select = select i1 %cmp, half -1.0, half %fneg.x
969  %add = fadd half %select, %y
970  ret half %add
971}
972
973define half @add_select_fneg_posk_f16(i32 %c, half %x, half %y) {
974; CI-LABEL: add_select_fneg_posk_f16:
975; CI:       ; %bb.0:
976; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
978; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
979; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
980; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
981; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
982; CI-NEXT:    v_cndmask_b32_e32 v0, -1.0, v1, vcc
983; CI-NEXT:    v_sub_f32_e32 v0, v2, v0
984; CI-NEXT:    s_setpc_b64 s[30:31]
985;
986; VI-LABEL: add_select_fneg_posk_f16:
987; VI:       ; %bb.0:
988; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
989; VI-NEXT:    v_mov_b32_e32 v3, 0xbc00
990; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
991; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
992; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
993; VI-NEXT:    s_setpc_b64 s[30:31]
994;
995; GFX11-LABEL: add_select_fneg_posk_f16:
996; GFX11:       ; %bb.0:
997; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
999; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
1000; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1001; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
1002; GFX11-NEXT:    s_setpc_b64 s[30:31]
1003  %cmp = icmp eq i32 %c, 0
1004  %fneg.x = fneg half %x
1005  %select = select i1 %cmp, half %fneg.x, half 1.0
1006  %add = fadd half %select, %y
1007  ret half %add
1008}
1009
1010define half @add_select_posk_fneg_f16(i32 %c, half %x, half %y) {
1011; CI-LABEL: add_select_posk_fneg_f16:
1012; CI:       ; %bb.0:
1013; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1015; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1016; CI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1017; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1018; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1019; CI-NEXT:    v_cndmask_b32_e32 v0, -1.0, v1, vcc
1020; CI-NEXT:    v_sub_f32_e32 v0, v2, v0
1021; CI-NEXT:    s_setpc_b64 s[30:31]
1022;
1023; VI-LABEL: add_select_posk_fneg_f16:
1024; VI:       ; %bb.0:
1025; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1026; VI-NEXT:    v_mov_b32_e32 v3, 0xbc00
1027; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1028; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
1029; VI-NEXT:    v_sub_f16_e32 v0, v2, v0
1030; VI-NEXT:    s_setpc_b64 s[30:31]
1031;
1032; GFX11-LABEL: add_select_posk_fneg_f16:
1033; GFX11:       ; %bb.0:
1034; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
1036; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo
1037; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1038; GFX11-NEXT:    v_sub_f16_e32 v0, v2, v0
1039; GFX11-NEXT:    s_setpc_b64 s[30:31]
1040  %cmp = icmp eq i32 %c, 0
1041  %fneg.x = fneg half %x
1042  %select = select i1 %cmp, half 1.0, half %fneg.x
1043  %add = fadd half %select, %y
1044  ret half %add
1045}
1046
1047define half @add_select_negfabs_fabs_f16(i32 %c, half %x, half %y, half %z) {
1048; CI-LABEL: add_select_negfabs_fabs_f16:
1049; CI:       ; %bb.0:
1050; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1051; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1052; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1053; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1054; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1055; CI-NEXT:    v_cvt_f32_f16_e64 v1, -|v1|
1056; CI-NEXT:    v_cvt_f32_f16_e64 v2, |v2|
1057; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1058; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1059; CI-NEXT:    v_add_f32_e32 v0, v0, v3
1060; CI-NEXT:    s_setpc_b64 s[30:31]
1061;
1062; VI-LABEL: add_select_negfabs_fabs_f16:
1063; VI:       ; %bb.0:
1064; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1065; VI-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1066; VI-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
1067; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1068; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1069; VI-NEXT:    v_add_f16_e32 v0, v0, v3
1070; VI-NEXT:    s_setpc_b64 s[30:31]
1071;
1072; GFX11-LABEL: add_select_negfabs_fabs_f16:
1073; GFX11:       ; %bb.0:
1074; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1075; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1076; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
1077; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1078; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1079; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
1080; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
1081; GFX11-NEXT:    s_setpc_b64 s[30:31]
1082  %cmp = icmp eq i32 %c, 0
1083  %fabs.x = call half @llvm.fabs.f16(half %x)
1084  %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
1085  %fabs.y = call half @llvm.fabs.f16(half %y)
1086  %select = select i1 %cmp, half %fneg.fabs.x, half %fabs.y
1087  %add = fadd half %select, %z
1088  ret half %add
1089}
1090
1091define half @add_select_fabs_negfabs_f16(i32 %c, half %x, half %y, half %z) {
1092; CI-LABEL: add_select_fabs_negfabs_f16:
1093; CI:       ; %bb.0:
1094; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1095; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1096; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1097; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1098; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1099; CI-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
1100; CI-NEXT:    v_cvt_f32_f16_e64 v2, -|v2|
1101; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1102; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1103; CI-NEXT:    v_add_f32_e32 v0, v0, v3
1104; CI-NEXT:    s_setpc_b64 s[30:31]
1105;
1106; VI-LABEL: add_select_fabs_negfabs_f16:
1107; VI:       ; %bb.0:
1108; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1109; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
1110; VI-NEXT:    v_or_b32_e32 v2, 0x8000, v2
1111; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1112; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1113; VI-NEXT:    v_add_f16_e32 v0, v0, v3
1114; VI-NEXT:    s_setpc_b64 s[30:31]
1115;
1116; GFX11-LABEL: add_select_fabs_negfabs_f16:
1117; GFX11:       ; %bb.0:
1118; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1119; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
1120; GFX11-NEXT:    v_or_b32_e32 v2, 0x8000, v2
1121; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1122; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1123; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
1124; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
1125; GFX11-NEXT:    s_setpc_b64 s[30:31]
1126  %cmp = icmp eq i32 %c, 0
1127  %fabs.x = call half @llvm.fabs.f16(half %x)
1128  %fabs.y = call half @llvm.fabs.f16(half %y)
1129  %fneg.fabs.y = fsub half -0.000000e+00, %fabs.y
1130  %select = select i1 %cmp, half %fabs.x, half %fneg.fabs.y
1131  %add = fadd half %select, %z
1132  ret half %add
1133}
1134
1135define half @add_select_neg_fabs_f16(i32 %c, half %x, half %y, half %z) {
1136; CI-LABEL: add_select_neg_fabs_f16:
1137; CI:       ; %bb.0:
1138; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1139; CI-NEXT:    v_cvt_f16_f32_e64 v1, -v1
1140; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1141; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1142; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1143; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1144; CI-NEXT:    v_cvt_f32_f16_e64 v2, |v2|
1145; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1146; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1147; CI-NEXT:    v_add_f32_e32 v0, v0, v3
1148; CI-NEXT:    s_setpc_b64 s[30:31]
1149;
1150; VI-LABEL: add_select_neg_fabs_f16:
1151; VI:       ; %bb.0:
1152; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153; VI-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
1154; VI-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
1155; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1156; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1157; VI-NEXT:    v_add_f16_e32 v0, v0, v3
1158; VI-NEXT:    s_setpc_b64 s[30:31]
1159;
1160; GFX11-LABEL: add_select_neg_fabs_f16:
1161; GFX11:       ; %bb.0:
1162; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1163; GFX11-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
1164; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
1165; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1166; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1167; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
1168; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
1169; GFX11-NEXT:    s_setpc_b64 s[30:31]
1170  %cmp = icmp eq i32 %c, 0
1171  %fneg.x = fsub half -0.000000e+00, %x
1172  %fabs.y = call half @llvm.fabs.f16(half %y)
1173  %select = select i1 %cmp, half %fneg.x, half %fabs.y
1174  %add = fadd half %select, %z
1175  ret half %add
1176}
1177
1178define half @add_select_fabs_neg_f16(i32 %c, half %x, half %y, half %z) {
1179; CI-LABEL: add_select_fabs_neg_f16:
1180; CI:       ; %bb.0:
1181; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1182; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1183; CI-NEXT:    v_cvt_f16_f32_e64 v2, -v2
1184; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1185; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1186; CI-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
1187; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1188; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1189; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1190; CI-NEXT:    v_add_f32_e32 v0, v0, v3
1191; CI-NEXT:    s_setpc_b64 s[30:31]
1192;
1193; VI-LABEL: add_select_fabs_neg_f16:
1194; VI:       ; %bb.0:
1195; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1196; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
1197; VI-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
1198; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1199; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1200; VI-NEXT:    v_add_f16_e32 v0, v0, v3
1201; VI-NEXT:    s_setpc_b64 s[30:31]
1202;
1203; GFX11-LABEL: add_select_fabs_neg_f16:
1204; GFX11:       ; %bb.0:
1205; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1206; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
1207; GFX11-NEXT:    v_xor_b32_e32 v2, 0x8000, v2
1208; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1209; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1210; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
1211; GFX11-NEXT:    v_add_f16_e32 v0, v0, v3
1212; GFX11-NEXT:    s_setpc_b64 s[30:31]
1213  %cmp = icmp eq i32 %c, 0
1214  %fabs.x = call half @llvm.fabs.f16(half %x)
1215  %fneg.y = fsub half -0.000000e+00, %y
1216  %select = select i1 %cmp, half %fabs.x, half %fneg.y
1217  %add = fadd half %select, %z
1218  ret half %add
1219}
1220
1221define half @add_select_neg_negfabs_f16(i32 %c, half %x, half %y, half %z) {
1222; CI-LABEL: add_select_neg_negfabs_f16:
1223; CI:       ; %bb.0:
1224; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1225; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1226; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1227; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1228; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1229; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1230; CI-NEXT:    v_cvt_f32_f16_e64 v2, |v2|
1231; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1232; CI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1233; CI-NEXT:    v_sub_f32_e32 v0, v3, v0
1234; CI-NEXT:    s_setpc_b64 s[30:31]
1235;
1236; VI-LABEL: add_select_neg_negfabs_f16:
1237; VI:       ; %bb.0:
1238; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1239; VI-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
1240; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1241; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1242; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
1243; VI-NEXT:    s_setpc_b64 s[30:31]
1244;
1245; GFX11-LABEL: add_select_neg_negfabs_f16:
1246; GFX11:       ; %bb.0:
1247; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248; GFX11-NEXT:    v_and_b32_e32 v2, 0x7fff, v2
1249; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1250; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1251; GFX11-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc_lo
1252; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
1253; GFX11-NEXT:    s_setpc_b64 s[30:31]
1254  %cmp = icmp eq i32 %c, 0
1255  %fneg.x = fsub half -0.000000e+00, %x
1256  %fabs.y = call half @llvm.fabs.f16(half %y)
1257  %fneg.fabs.y = fsub half -0.000000e+00, %fabs.y
1258  %select = select i1 %cmp, half %fneg.x, half %fneg.fabs.y
1259  %add = fadd half %select, %z
1260  ret half %add
1261}
1262
1263define half @add_select_negfabs_neg_f16(i32 %c, half %x, half %y, half %z) {
1264; CI-LABEL: add_select_negfabs_neg_f16:
1265; CI:       ; %bb.0:
1266; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1267; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1268; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1269; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1270; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1271; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1272; CI-NEXT:    v_cvt_f32_f16_e64 v1, |v1|
1273; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1274; CI-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
1275; CI-NEXT:    v_sub_f32_e32 v0, v3, v0
1276; CI-NEXT:    s_setpc_b64 s[30:31]
1277;
1278; VI-LABEL: add_select_negfabs_neg_f16:
1279; VI:       ; %bb.0:
1280; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1281; VI-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
1282; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1283; VI-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc
1284; VI-NEXT:    v_sub_f16_e32 v0, v3, v0
1285; VI-NEXT:    s_setpc_b64 s[30:31]
1286;
1287; GFX11-LABEL: add_select_negfabs_neg_f16:
1288; GFX11:       ; %bb.0:
1289; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1290; GFX11-NEXT:    v_and_b32_e32 v1, 0x7fff, v1
1291; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1292; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1293; GFX11-NEXT:    v_cndmask_b32_e32 v0, v1, v2, vcc_lo
1294; GFX11-NEXT:    v_sub_f16_e32 v0, v3, v0
1295; GFX11-NEXT:    s_setpc_b64 s[30:31]
1296  %cmp = icmp eq i32 %c, 0
1297  %fabs.x = call half @llvm.fabs.f16(half %x)
1298  %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
1299  %fneg.y = fsub half -0.000000e+00, %y
1300  %select = select i1 %cmp, half %fneg.y, half %fneg.fabs.x
1301  %add = fadd half %select, %z
1302  ret half %add
1303}
1304
1305define half @mul_select_negfabs_posk_f16(i32 %c, half %x, half %y) {
1306; CI-LABEL: mul_select_negfabs_posk_f16:
1307; CI:       ; %bb.0:
1308; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1310; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1311; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1312; CI-NEXT:    v_cvt_f32_f16_e64 v1, -|v1|
1313; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1314; CI-NEXT:    v_cndmask_b32_e32 v0, 4.0, v1, vcc
1315; CI-NEXT:    v_mul_f32_e32 v0, v0, v2
1316; CI-NEXT:    s_setpc_b64 s[30:31]
1317;
1318; VI-LABEL: mul_select_negfabs_posk_f16:
1319; VI:       ; %bb.0:
1320; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1321; VI-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1322; VI-NEXT:    v_mov_b32_e32 v3, 0x4400
1323; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1324; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
1325; VI-NEXT:    v_mul_f16_e32 v0, v0, v2
1326; VI-NEXT:    s_setpc_b64 s[30:31]
1327;
1328; GFX11-LABEL: mul_select_negfabs_posk_f16:
1329; GFX11:       ; %bb.0:
1330; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1331; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1332; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1333; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1334; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo
1335; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
1336; GFX11-NEXT:    s_setpc_b64 s[30:31]
1337  %cmp = icmp eq i32 %c, 0
1338  %fabs.x = call half @llvm.fabs.f16(half %x)
1339  %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
1340  %select = select i1 %cmp, half %fneg.fabs.x, half 4.0
1341  %add = fmul half %select, %y
1342  ret half %add
1343}
1344
1345define half @mul_select_posk_negfabs_f16(i32 %c, half %x, half %y) {
1346; CI-LABEL: mul_select_posk_negfabs_f16:
1347; CI:       ; %bb.0:
1348; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1349; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1350; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1351; CI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1352; CI-NEXT:    v_cvt_f32_f16_e64 v1, -|v1|
1353; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1354; CI-NEXT:    v_cndmask_b32_e32 v0, 4.0, v1, vcc
1355; CI-NEXT:    v_mul_f32_e32 v0, v0, v2
1356; CI-NEXT:    s_setpc_b64 s[30:31]
1357;
1358; VI-LABEL: mul_select_posk_negfabs_f16:
1359; VI:       ; %bb.0:
1360; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361; VI-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1362; VI-NEXT:    v_mov_b32_e32 v3, 0x4400
1363; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1364; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
1365; VI-NEXT:    v_mul_f16_e32 v0, v0, v2
1366; VI-NEXT:    s_setpc_b64 s[30:31]
1367;
1368; GFX11-LABEL: mul_select_posk_negfabs_f16:
1369; GFX11:       ; %bb.0:
1370; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1372; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
1373; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1374; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo
1375; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
1376; GFX11-NEXT:    s_setpc_b64 s[30:31]
1377  %cmp = icmp eq i32 %c, 0
1378  %fabs.x = call half @llvm.fabs.f16(half %x)
1379  %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
1380  %select = select i1 %cmp, half 4.0, half %fneg.fabs.x
1381  %add = fmul half %select, %y
1382  ret half %add
1383}
1384
1385define half @mul_select_negfabs_negk_f16(i32 %c, half %x, half %y) {
1386; CI-LABEL: mul_select_negfabs_negk_f16:
1387; CI:       ; %bb.0:
1388; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1389; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1390; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1391; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1392; CI-NEXT:    v_cvt_f32_f16_e64 v1, -|v1|
1393; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1394; CI-NEXT:    v_cndmask_b32_e32 v0, -4.0, v1, vcc
1395; CI-NEXT:    v_mul_f32_e32 v0, v0, v2
1396; CI-NEXT:    s_setpc_b64 s[30:31]
1397;
1398; VI-LABEL: mul_select_negfabs_negk_f16:
1399; VI:       ; %bb.0:
1400; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1401; VI-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1402; VI-NEXT:    v_mov_b32_e32 v3, 0xc400
1403; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1404; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
1405; VI-NEXT:    v_mul_f16_e32 v0, v0, v2
1406; VI-NEXT:    s_setpc_b64 s[30:31]
1407;
1408; GFX11-LABEL: mul_select_negfabs_negk_f16:
1409; GFX11:       ; %bb.0:
1410; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1411; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1412; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1413; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1414; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo
1415; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
1416; GFX11-NEXT:    s_setpc_b64 s[30:31]
1417  %cmp = icmp eq i32 %c, 0
1418  %fabs.x = call half @llvm.fabs.f16(half %x)
1419  %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
1420  %select = select i1 %cmp, half %fneg.fabs.x, half -4.0
1421  %add = fmul half %select, %y
1422  ret half %add
1423}
1424
1425define half @mul_select_negk_negfabs_f16(i32 %c, half %x, half %y) {
1426; CI-LABEL: mul_select_negk_negfabs_f16:
1427; CI:       ; %bb.0:
1428; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1429; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1430; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1431; CI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1432; CI-NEXT:    v_cvt_f32_f16_e64 v1, -|v1|
1433; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1434; CI-NEXT:    v_cndmask_b32_e32 v0, -4.0, v1, vcc
1435; CI-NEXT:    v_mul_f32_e32 v0, v0, v2
1436; CI-NEXT:    s_setpc_b64 s[30:31]
1437;
1438; VI-LABEL: mul_select_negk_negfabs_f16:
1439; VI:       ; %bb.0:
1440; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1441; VI-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1442; VI-NEXT:    v_mov_b32_e32 v3, 0xc400
1443; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
1444; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
1445; VI-NEXT:    v_mul_f16_e32 v0, v0, v2
1446; VI-NEXT:    s_setpc_b64 s[30:31]
1447;
1448; GFX11-LABEL: mul_select_negk_negfabs_f16:
1449; GFX11:       ; %bb.0:
1450; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1451; GFX11-NEXT:    v_or_b32_e32 v1, 0x8000, v1
1452; GFX11-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v0
1453; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1454; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo
1455; GFX11-NEXT:    v_mul_f16_e32 v0, v0, v2
1456; GFX11-NEXT:    s_setpc_b64 s[30:31]
1457  %cmp = icmp eq i32 %c, 0
1458  %fabs.x = call half @llvm.fabs.f16(half %x)
1459  %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x
1460  %select = select i1 %cmp, half -4.0, half %fneg.fabs.x
1461  %add = fmul half %select, %y
1462  ret half %add
1463}
1464
1465; --------------------------------------------------------------------------------
1466; Don't fold if fneg can fold into the source
1467; --------------------------------------------------------------------------------
1468
1469define half @select_fneg_posk_src_add_f16(i32 %c, half %x, half %y) {
1470; CI-SAFE-LABEL: select_fneg_posk_src_add_f16:
1471; CI-SAFE:       ; %bb.0:
1472; CI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1473; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
1474; CI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1475; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
1476; CI-SAFE-NEXT:    v_add_f32_e32 v1, 4.0, v1
1477; CI-SAFE-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v1, vcc
1478; CI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1479;
1480; VI-SAFE-LABEL: select_fneg_posk_src_add_f16:
1481; VI-SAFE:       ; %bb.0:
1482; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1483; VI-SAFE-NEXT:    v_add_f16_e32 v1, 4.0, v1
1484; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
1485; VI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
1486; VI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1487; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1488; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1489;
1490; GFX11-SAFE-LABEL: select_fneg_posk_src_add_f16:
1491; GFX11-SAFE:       ; %bb.0:
1492; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1493; GFX11-SAFE-NEXT:    v_add_f16_e32 v1, 4.0, v1
1494; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1495; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1496; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
1497; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1498; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
1499;
1500; CI-NSZ-LABEL: select_fneg_posk_src_add_f16:
1501; CI-NSZ:       ; %bb.0:
1502; CI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503; CI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
1504; CI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1505; CI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
1506; CI-NSZ-NEXT:    v_sub_f32_e32 v1, -4.0, v1
1507; CI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 2.0, v1, vcc
1508; CI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1509;
1510; VI-NSZ-LABEL: select_fneg_posk_src_add_f16:
1511; VI-NSZ:       ; %bb.0:
1512; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1513; VI-NSZ-NEXT:    v_sub_f16_e32 v1, -4.0, v1
1514; VI-NSZ-NEXT:    v_mov_b32_e32 v2, 0x4000
1515; VI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1516; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1517; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1518;
1519; GFX11-NSZ-LABEL: select_fneg_posk_src_add_f16:
1520; GFX11-NSZ:       ; %bb.0:
1521; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1522; GFX11-NSZ-NEXT:    v_sub_f16_e32 v1, -4.0, v1
1523; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1524; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1525; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1526; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
1527  %cmp = icmp eq i32 %c, 0
1528  %add = fadd half %x, 4.0
1529  %fneg = fneg half %add
1530  %select = select i1 %cmp, half %fneg, half 2.0
1531  ret half %select
1532}
1533
1534define half @select_fneg_posk_src_sub_f16(i32 %c, half %x) {
1535; CI-SAFE-LABEL: select_fneg_posk_src_sub_f16:
1536; CI-SAFE:       ; %bb.0:
1537; CI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1538; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
1539; CI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1540; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
1541; CI-SAFE-NEXT:    v_add_f32_e32 v1, -4.0, v1
1542; CI-SAFE-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v1, vcc
1543; CI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1544;
1545; VI-SAFE-LABEL: select_fneg_posk_src_sub_f16:
1546; VI-SAFE:       ; %bb.0:
1547; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1548; VI-SAFE-NEXT:    v_add_f16_e32 v1, -4.0, v1
1549; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
1550; VI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
1551; VI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1552; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1553; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1554;
1555; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_f16:
1556; GFX11-SAFE:       ; %bb.0:
1557; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1558; GFX11-SAFE-NEXT:    v_add_f16_e32 v1, -4.0, v1
1559; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1560; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1561; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
1562; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1563; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
1564;
1565; CI-NSZ-LABEL: select_fneg_posk_src_sub_f16:
1566; CI-NSZ:       ; %bb.0:
1567; CI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1568; CI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
1569; CI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1570; CI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
1571; CI-NSZ-NEXT:    v_sub_f32_e32 v1, 4.0, v1
1572; CI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 2.0, v1, vcc
1573; CI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1574;
1575; VI-NSZ-LABEL: select_fneg_posk_src_sub_f16:
1576; VI-NSZ:       ; %bb.0:
1577; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1578; VI-NSZ-NEXT:    v_sub_f16_e32 v1, 4.0, v1
1579; VI-NSZ-NEXT:    v_mov_b32_e32 v2, 0x4000
1580; VI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1581; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1582; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1583;
1584; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_f16:
1585; GFX11-NSZ:       ; %bb.0:
1586; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1587; GFX11-NSZ-NEXT:    v_sub_f16_e32 v1, 4.0, v1
1588; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1589; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1590; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1591; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
1592  %cmp = icmp eq i32 %c, 0
1593  %add = fsub half %x, 4.0
1594  %fneg = fneg half %add
1595  %select = select i1 %cmp, half %fneg, half 2.0
1596  ret half %select
1597}
1598
1599define half @select_fneg_posk_src_mul_f16(i32 %c, half %x) {
1600; CI-LABEL: select_fneg_posk_src_mul_f16:
1601; CI:       ; %bb.0:
1602; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1603; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1604; CI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1605; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1606; CI-NEXT:    v_mul_f32_e32 v1, -4.0, v1
1607; CI-NEXT:    v_cndmask_b32_e32 v0, 2.0, v1, vcc
1608; CI-NEXT:    s_setpc_b64 s[30:31]
1609;
1610; VI-LABEL: select_fneg_posk_src_mul_f16:
1611; VI:       ; %bb.0:
1612; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1613; VI-NEXT:    v_mul_f16_e32 v1, -4.0, v1
1614; VI-NEXT:    v_mov_b32_e32 v2, 0x4000
1615; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1616; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1617; VI-NEXT:    s_setpc_b64 s[30:31]
1618;
1619; GFX11-LABEL: select_fneg_posk_src_mul_f16:
1620; GFX11:       ; %bb.0:
1621; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1622; GFX11-NEXT:    v_mul_f16_e32 v1, -4.0, v1
1623; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1624; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1625; GFX11-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1626; GFX11-NEXT:    s_setpc_b64 s[30:31]
1627  %cmp = icmp eq i32 %c, 0
1628  %mul = fmul half %x, 4.0
1629  %fneg = fneg half %mul
1630  %select = select i1 %cmp, half %fneg, half 2.0
1631  ret half %select
1632}
1633
1634define half @select_fneg_posk_src_fma_f16(i32 %c, half %x, half %z) {
1635; CI-SAFE-LABEL: select_fneg_posk_src_fma_f16:
1636; CI-SAFE:       ; %bb.0:
1637; CI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
1639; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
1640; CI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1641; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
1642; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
1643; CI-SAFE-NEXT:    v_fma_f32 v1, v1, 4.0, v2
1644; CI-SAFE-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v1, vcc
1645; CI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1646;
1647; VI-SAFE-LABEL: select_fneg_posk_src_fma_f16:
1648; VI-SAFE:       ; %bb.0:
1649; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1650; VI-SAFE-NEXT:    v_fma_f16 v1, v1, 4.0, v2
1651; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
1652; VI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
1653; VI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1654; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1655; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1656;
1657; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_f16:
1658; GFX11-SAFE:       ; %bb.0:
1659; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1660; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, 4.0, v1
1661; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1662; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1663; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v2
1664; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1665; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
1666;
1667; CI-NSZ-LABEL: select_fneg_posk_src_fma_f16:
1668; CI-NSZ:       ; %bb.0:
1669; CI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1670; CI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
1671; CI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
1672; CI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1673; CI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
1674; CI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
1675; CI-NSZ-NEXT:    v_fma_f32 v1, v1, -4.0, -v2
1676; CI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 2.0, v1, vcc
1677; CI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1678;
1679; VI-NSZ-LABEL: select_fneg_posk_src_fma_f16:
1680; VI-NSZ:       ; %bb.0:
1681; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1682; VI-NSZ-NEXT:    v_fma_f16 v1, v1, -4.0, -v2
1683; VI-NSZ-NEXT:    v_mov_b32_e32 v2, 0x4000
1684; VI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1685; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1686; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1687;
1688; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_f16:
1689; GFX11-NSZ:       ; %bb.0:
1690; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1691; GFX11-NSZ-NEXT:    v_fma_f16 v1, v1, -4.0, -v2
1692; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1693; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1694; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1695; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
1696  %cmp = icmp eq i32 %c, 0
1697  %fma = call half @llvm.fma.f16(half %x, half 4.0, half %z)
1698  %fneg = fneg half %fma
1699  %select = select i1 %cmp, half %fneg, half 2.0
1700  ret half %select
1701}
1702
1703define half @select_fneg_posk_src_fmad_f16(i32 %c, half %x, half %z) {
1704; CI-SAFE-LABEL: select_fneg_posk_src_fmad_f16:
1705; CI-SAFE:       ; %bb.0:
1706; CI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1707; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v1, v1
1708; CI-SAFE-NEXT:    v_cvt_f16_f32_e32 v2, v2
1709; CI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1710; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v1, v1
1711; CI-SAFE-NEXT:    v_cvt_f32_f16_e32 v2, v2
1712; CI-SAFE-NEXT:    v_mul_f32_e32 v1, 4.0, v1
1713; CI-SAFE-NEXT:    v_add_f32_e32 v1, v1, v2
1714; CI-SAFE-NEXT:    v_cndmask_b32_e64 v0, 2.0, -v1, vcc
1715; CI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1716;
1717; VI-SAFE-LABEL: select_fneg_posk_src_fmad_f16:
1718; VI-SAFE:       ; %bb.0:
1719; VI-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1720; VI-SAFE-NEXT:    v_fma_f16 v1, v1, 4.0, v2
1721; VI-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
1722; VI-SAFE-NEXT:    v_mov_b32_e32 v2, 0x4000
1723; VI-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1724; VI-SAFE-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1725; VI-SAFE-NEXT:    s_setpc_b64 s[30:31]
1726;
1727; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_f16:
1728; GFX11-SAFE:       ; %bb.0:
1729; GFX11-SAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1730; GFX11-SAFE-NEXT:    v_fmac_f16_e32 v2, 4.0, v1
1731; GFX11-SAFE-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1732; GFX11-SAFE-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1733; GFX11-SAFE-NEXT:    v_xor_b32_e32 v1, 0x8000, v2
1734; GFX11-SAFE-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1735; GFX11-SAFE-NEXT:    s_setpc_b64 s[30:31]
1736;
1737; CI-NSZ-LABEL: select_fneg_posk_src_fmad_f16:
1738; CI-NSZ:       ; %bb.0:
1739; CI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1740; CI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, v1
1741; CI-NSZ-NEXT:    v_cvt_f16_f32_e32 v2, v2
1742; CI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1743; CI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
1744; CI-NSZ-NEXT:    v_cvt_f32_f16_e32 v2, v2
1745; CI-NSZ-NEXT:    v_mul_f32_e32 v1, -4.0, v1
1746; CI-NSZ-NEXT:    v_sub_f32_e32 v1, v1, v2
1747; CI-NSZ-NEXT:    v_cndmask_b32_e32 v0, 2.0, v1, vcc
1748; CI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1749;
1750; VI-NSZ-LABEL: select_fneg_posk_src_fmad_f16:
1751; VI-NSZ:       ; %bb.0:
1752; VI-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753; VI-NSZ-NEXT:    v_fma_f16 v1, v1, -4.0, -v2
1754; VI-NSZ-NEXT:    v_mov_b32_e32 v2, 0x4000
1755; VI-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1756; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
1757; VI-NSZ-NEXT:    s_setpc_b64 s[30:31]
1758;
1759; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_f16:
1760; GFX11-NSZ:       ; %bb.0:
1761; GFX11-NSZ-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1762; GFX11-NSZ-NEXT:    v_fma_f16 v1, v1, -4.0, -v2
1763; GFX11-NSZ-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1764; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_2)
1765; GFX11-NSZ-NEXT:    v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo
1766; GFX11-NSZ-NEXT:    s_setpc_b64 s[30:31]
1767  %cmp = icmp eq i32 %c, 0
1768  %fmad = call half @llvm.fmuladd.f16(half %x, half 4.0, half %z)
1769  %fneg = fneg half %fmad
1770  %select = select i1 %cmp, half %fneg, half 2.0
1771  ret half %select
1772}
1773
1774declare half @llvm.fabs.f16(half) #0
1775declare half @llvm.fma.f16(half, half, half) #0
1776declare half @llvm.fmuladd.f16(half, half, half) #0
1777
1778attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
1779