xref: /llvm-project/llvm/test/CodeGen/AMDGPU/mad-mix.ll (revision ca955197047ce044dec1e85fd401b1788550602d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic -verify-machineinstrs --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX9GEN,SDAG-GFX9GEN %s
6; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
7; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
8
9; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
10; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
11; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
12; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GEN,GISEL-GFX9GEN %s
13; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
14; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
15
16define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
17; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
18; GFX1100:       ; %bb.0:
19; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
21; GFX1100-NEXT:    s_setpc_b64 s[30:31]
22;
23; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
24; GFX900:       ; %bb.0:
25; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
27; GFX900-NEXT:    s_setpc_b64 s[30:31]
28;
29; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
30; GFX906:       ; %bb.0:
31; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
33; GFX906-NEXT:    s_setpc_b64 s[30:31]
34;
35; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
36; GFX9GEN:       ; %bb.0:
37; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v3, v0
39; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
40; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
41; GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
42; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
43;
44; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
45; VI:       ; %bb.0:
46; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; VI-NEXT:    v_cvt_f32_f16_e32 v3, v0
48; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
49; VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
50; VI-NEXT:    v_mac_f32_e32 v0, v3, v1
51; VI-NEXT:    s_setpc_b64 s[30:31]
52;
53; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
54; SDAG-CI:       ; %bb.0:
55; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
57; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
58;
59; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
60; GISEL-CI:       ; %bb.0:
61; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v0
63; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
64; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
65; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v3, v1
66; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
67  %src0.ext = fpext half %src0 to float
68  %src1.ext = fpext half %src1 to float
69  %src2.ext = fpext half %src2 to float
70  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
71  ret float %result
72}
73
74define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 {
75; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
76; GFX1100:       ; %bb.0:
77; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
79; GFX1100-NEXT:    s_setpc_b64 s[30:31]
80;
81; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
82; GFX900:       ; %bb.0:
83; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
85; GFX900-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
88; GFX906:       ; %bb.0:
89; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
91; GFX906-NEXT:    s_setpc_b64 s[30:31]
92;
93; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
94; GFX9GEN:       ; %bb.0:
95; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
97; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
98; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
99; GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
100; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
101;
102; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
103; VI:       ; %bb.0:
104; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
106; VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
107; VI-NEXT:    v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
108; VI-NEXT:    v_mac_f32_e32 v0, v3, v1
109; VI-NEXT:    s_setpc_b64 s[30:31]
110;
111; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
112; CI:       ; %bb.0:
113; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
115; CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
116; CI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
117; CI-NEXT:    v_cvt_f32_f16_e32 v3, v0
118; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
119; CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
120; CI-NEXT:    v_mac_f32_e32 v0, v3, v1
121; CI-NEXT:    s_setpc_b64 s[30:31]
122  %src0.hi = lshr i32 %src0, 16
123  %src1.hi = lshr i32 %src1, 16
124  %src2.hi = lshr i32 %src2, 16
125  %src0.i16 = trunc i32 %src0.hi to i16
126  %src1.i16 = trunc i32 %src1.hi to i16
127  %src2.i16 = trunc i32 %src2.hi to i16
128  %src0.fp16 = bitcast i16 %src0.i16 to half
129  %src1.fp16 = bitcast i16 %src1.i16 to half
130  %src2.fp16 = bitcast i16 %src2.i16 to half
131  %src0.ext = fpext half %src0.fp16 to float
132  %src1.ext = fpext half %src1.fp16 to float
133  %src2.ext = fpext half %src2.fp16 to float
134  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
135  ret float %result
136}
137
138define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
139; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
140; GFX1100:       ; %bb.0:
141; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
143; GFX1100-NEXT:    s_setpc_b64 s[30:31]
144;
145; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
146; GFX900:       ; %bb.0:
147; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
148; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
149; GFX900-NEXT:    s_setpc_b64 s[30:31]
150;
151; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
152; GFX906:       ; %bb.0:
153; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
155; GFX906-NEXT:    s_setpc_b64 s[30:31]
156;
157; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
158; GFX9GEN:       ; %bb.0:
159; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
161; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
162; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
163; GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
164; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
165;
166; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
167; VI:       ; %bb.0:
168; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
169; VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
170; VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
171; VI-NEXT:    v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
172; VI-NEXT:    v_mac_f32_e32 v0, v3, v1
173; VI-NEXT:    s_setpc_b64 s[30:31]
174;
175; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
176; SDAG-CI:       ; %bb.0:
177; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; SDAG-CI-NEXT:    v_mad_f32 v0, v1, v3, v5
179; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
180;
181; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
182; GISEL-CI:       ; %bb.0:
183; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
185; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v3
186; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v5
187; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v1, v2
188; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
189  %src0.hi = extractelement <2 x half> %src0, i32 1
190  %src1.hi = extractelement <2 x half> %src1, i32 1
191  %src2.hi = extractelement <2 x half> %src2, i32 1
192  %src0.ext = fpext half %src0.hi to float
193  %src1.ext = fpext half %src1.hi to float
194  %src2.ext = fpext half %src2.hi to float
195  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
196  ret float %result
197}
198
199define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
200; GFX1100-LABEL: v_mad_mix_v2f32:
201; GFX1100:       ; %bb.0:
202; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX1100-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
204; GFX1100-NEXT:    v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
205; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2)
206; GFX1100-NEXT:    v_mov_b32_e32 v0, v3
207; GFX1100-NEXT:    s_setpc_b64 s[30:31]
208;
209; SDAG-GFX900-LABEL: v_mad_mix_v2f32:
210; SDAG-GFX900:       ; %bb.0:
211; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; SDAG-GFX900-NEXT:    v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
213; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
214; SDAG-GFX900-NEXT:    v_mov_b32_e32 v1, v3
215; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
216;
217; SDAG-GFX906-LABEL: v_mad_mix_v2f32:
218; SDAG-GFX906:       ; %bb.0:
219; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
220; SDAG-GFX906-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
221; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
222; SDAG-GFX906-NEXT:    v_mov_b32_e32 v1, v3
223; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
224;
225; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32:
226; SDAG-GFX9GEN:       ; %bb.0:
227; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
229; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v4, v0
230; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
231; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v6, v1
232; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
233; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
234; SDAG-GFX9GEN-NEXT:    v_mac_f32_e32 v1, v3, v5
235; SDAG-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v4, v6
236; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
237;
238; SDAG-VI-LABEL: v_mad_mix_v2f32:
239; SDAG-VI:       ; %bb.0:
240; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
242; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v4, v0
243; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
244; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v6, v1
245; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
246; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
247; SDAG-VI-NEXT:    v_mac_f32_e32 v1, v3, v5
248; SDAG-VI-NEXT:    v_mac_f32_e32 v0, v4, v6
249; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
250;
251; SDAG-CI-LABEL: v_mad_mix_v2f32:
252; SDAG-CI:       ; %bb.0:
253; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v5, v5
255; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v6, v3
256; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
257; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v4, v4
258; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v3, v5
259; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v5, v6
260; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
261; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v6, v0
262; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
263; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v0, v4
264; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
265; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v4, v6
266; SDAG-CI-NEXT:    v_mac_f32_e32 v3, v1, v5
267; SDAG-CI-NEXT:    v_mov_b32_e32 v1, v3
268; SDAG-CI-NEXT:    v_mac_f32_e32 v0, v4, v2
269; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
270;
271; GISEL-GFX900-LABEL: v_mad_mix_v2f32:
272; GISEL-GFX900:       ; %bb.0:
273; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GISEL-GFX900-NEXT:    v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
275; GISEL-GFX900-NEXT:    v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
276; GISEL-GFX900-NEXT:    v_mov_b32_e32 v0, v3
277; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
278;
279; GISEL-GFX906-LABEL: v_mad_mix_v2f32:
280; GISEL-GFX906:       ; %bb.0:
281; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
282; GISEL-GFX906-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1]
283; GISEL-GFX906-NEXT:    v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
284; GISEL-GFX906-NEXT:    v_mov_b32_e32 v0, v3
285; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
286;
287; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32:
288; GISEL-GFX9GEN:       ; %bb.0:
289; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v3, v0
291; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
292; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v5, v1
293; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
294; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
295; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
296; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v5
297; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v1, v4, v6
298; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
299;
300; GISEL-VI-LABEL: v_mad_mix_v2f32:
301; GISEL-VI:       ; %bb.0:
302; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
303; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v3, v0
304; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
305; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v5, v1
306; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
307; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
308; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
309; GISEL-VI-NEXT:    v_mac_f32_e32 v0, v3, v5
310; GISEL-VI-NEXT:    v_mac_f32_e32 v1, v4, v6
311; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
312;
313; GISEL-CI-LABEL: v_mad_mix_v2f32:
314; GISEL-CI:       ; %bb.0:
315; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
316; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v6, v0
317; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v7, v1
318; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
319; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
320; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v4
321; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v5
322; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v6, v2
323; GISEL-CI-NEXT:    v_mac_f32_e32 v1, v7, v3
324; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
325  %src0.ext = fpext <2 x half> %src0 to <2 x float>
326  %src1.ext = fpext <2 x half> %src1 to <2 x float>
327  %src2.ext = fpext <2 x half> %src2 to <2 x float>
328  %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
329  ret <2 x float> %result
330}
331
332define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
333; GFX1100-LABEL: v_mad_mix_v2f32_shuffle:
334; GFX1100:       ; %bb.0:
335; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336; GFX1100-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
337; GFX1100-NEXT:    v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
338; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2)
339; GFX1100-NEXT:    v_mov_b32_e32 v0, v3
340; GFX1100-NEXT:    s_setpc_b64 s[30:31]
341;
342; GFX900-LABEL: v_mad_mix_v2f32_shuffle:
343; GFX900:       ; %bb.0:
344; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345; GFX900-NEXT:    v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
346; GFX900-NEXT:    v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
347; GFX900-NEXT:    v_mov_b32_e32 v0, v3
348; GFX900-NEXT:    s_setpc_b64 s[30:31]
349;
350; GFX906-LABEL: v_mad_mix_v2f32_shuffle:
351; GFX906:       ; %bb.0:
352; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GFX906-NEXT:    v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1]
354; GFX906-NEXT:    v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1]
355; GFX906-NEXT:    v_mov_b32_e32 v0, v3
356; GFX906-NEXT:    s_setpc_b64 s[30:31]
357;
358; GFX9GEN-LABEL: v_mad_mix_v2f32_shuffle:
359; GFX9GEN:       ; %bb.0:
360; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
361; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
362; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v4, v0
363; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v1
364; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
365; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
366; GFX9GEN-NEXT:    v_mad_f32 v0, v3, v0, v2
367; GFX9GEN-NEXT:    v_mac_f32_e32 v2, v4, v1
368; GFX9GEN-NEXT:    v_mov_b32_e32 v1, v2
369; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
370;
371; VI-LABEL: v_mad_mix_v2f32_shuffle:
372; VI:       ; %bb.0:
373; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374; VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
375; VI-NEXT:    v_cvt_f32_f16_e32 v4, v0
376; VI-NEXT:    v_cvt_f32_f16_e32 v0, v1
377; VI-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
378; VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
379; VI-NEXT:    v_mad_f32 v0, v3, v0, v2
380; VI-NEXT:    v_mac_f32_e32 v2, v4, v1
381; VI-NEXT:    v_mov_b32_e32 v1, v2
382; VI-NEXT:    s_setpc_b64 s[30:31]
383;
384; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle:
385; SDAG-CI:       ; %bb.0:
386; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
388; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
389; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
390; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
391; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
392; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
393; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
394; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v4, v0
395; SDAG-CI-NEXT:    v_mad_f32 v0, v1, v2, v5
396; SDAG-CI-NEXT:    v_mad_f32 v1, v4, v3, v5
397; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
398;
399; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle:
400; GISEL-CI:       ; %bb.0:
401; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
402; GISEL-CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
403; GISEL-CI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
404; GISEL-CI-NEXT:    v_or_b32_e32 v0, v1, v0
405; GISEL-CI-NEXT:    v_lshlrev_b32_e32 v1, 16, v5
406; GISEL-CI-NEXT:    v_and_b32_e32 v4, 0xffff, v4
407; GISEL-CI-NEXT:    v_or_b32_e32 v1, v1, v4
408; GISEL-CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
409; GISEL-CI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
410; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v4, v4
411; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v5, v0
412; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
413; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
414; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v3
415; GISEL-CI-NEXT:    v_mad_f32 v0, v4, v0, v1
416; GISEL-CI-NEXT:    v_mac_f32_e32 v1, v5, v2
417; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
418  %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0>
419  %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1>
420  %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1>
421  %src0.ext = fpext <2 x half> %src0.shuf to <2 x float>
422  %src1.ext = fpext <2 x half> %src1.shuf to <2 x float>
423  %src2.ext = fpext <2 x half> %src2.shuf to <2 x float>
424  %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext)
425  ret <2 x float> %result
426}
427
428define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
429; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
430; GFX1100:       ; %bb.0:
431; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
432; GFX1100-NEXT:    v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
433; GFX1100-NEXT:    s_setpc_b64 s[30:31]
434;
435; GFX900-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
436; GFX900:       ; %bb.0:
437; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438; GFX900-NEXT:    v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
439; GFX900-NEXT:    s_setpc_b64 s[30:31]
440;
441; GFX906-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
442; GFX906:       ; %bb.0:
443; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
444; GFX906-NEXT:    v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
445; GFX906-NEXT:    s_setpc_b64 s[30:31]
446;
447; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
448; SDAG-GFX9GEN:       ; %bb.0:
449; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
450; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
451; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
452; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
453; SDAG-GFX9GEN-NEXT:    v_mad_f32 v0, -v0, v1, v2
454; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
455;
456; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
457; SDAG-VI:       ; %bb.0:
458; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
460; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
461; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
462; SDAG-VI-NEXT:    v_mad_f32 v0, -v0, v1, v2
463; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
464;
465; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
466; SDAG-CI:       ; %bb.0:
467; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468; SDAG-CI-NEXT:    v_mad_f32 v0, -v0, v1, v2
469; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
470;
471; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
472; GISEL-GFX9GEN:       ; %bb.0:
473; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e64 v3, -v0
475; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
476; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
477; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
478; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
479;
480; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
481; GISEL-VI:       ; %bb.0:
482; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483; GISEL-VI-NEXT:    v_cvt_f32_f16_e64 v3, -v0
484; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
485; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
486; GISEL-VI-NEXT:    v_mac_f32_e32 v0, v3, v1
487; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
488;
489; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
490; GISEL-CI:       ; %bb.0:
491; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GISEL-CI-NEXT:    v_cvt_f32_f16_e64 v3, -v0
493; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
494; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
495; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v3, v1
496; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
497  %src0.ext = fpext half %src0 to float
498  %src1.ext = fpext half %src1 to float
499  %src2.ext = fpext half %src2 to float
500  %src0.ext.neg = fneg float %src0.ext
501  %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext)
502  ret float %result
503}
504
505define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
506; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
507; GFX1100:       ; %bb.0:
508; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509; GFX1100-NEXT:    v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
510; GFX1100-NEXT:    s_setpc_b64 s[30:31]
511;
512; GFX900-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
513; GFX900:       ; %bb.0:
514; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
515; GFX900-NEXT:    v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
516; GFX900-NEXT:    s_setpc_b64 s[30:31]
517;
518; GFX906-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
519; GFX906:       ; %bb.0:
520; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX906-NEXT:    v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
522; GFX906-NEXT:    s_setpc_b64 s[30:31]
523;
524; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
525; GFX9GEN:       ; %bb.0:
526; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
527; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
528; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
529; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
530; GFX9GEN-NEXT:    v_mad_f32 v0, |v0|, v1, v2
531; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
532;
533; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
534; VI:       ; %bb.0:
535; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
537; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
538; VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
539; VI-NEXT:    v_mad_f32 v0, |v0|, v1, v2
540; VI-NEXT:    s_setpc_b64 s[30:31]
541;
542; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
543; SDAG-CI:       ; %bb.0:
544; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
545; SDAG-CI-NEXT:    v_mad_f32 v0, |v0|, v1, v2
546; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
547;
548; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
549; GISEL-CI:       ; %bb.0:
550; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
551; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
552; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
553; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
554; GISEL-CI-NEXT:    v_mad_f32 v0, |v0|, v1, v2
555; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
556  %src0.ext = fpext half %src0 to float
557  %src1.ext = fpext half %src1 to float
558  %src2.ext = fpext half %src2 to float
559  %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
560  %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
561  ret float %result
562}
563
564define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 {
565; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
566; GFX1100:       ; %bb.0:
567; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568; GFX1100-NEXT:    v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
569; GFX1100-NEXT:    s_setpc_b64 s[30:31]
570;
571; GFX900-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
572; GFX900:       ; %bb.0:
573; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
574; GFX900-NEXT:    v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
575; GFX900-NEXT:    s_setpc_b64 s[30:31]
576;
577; GFX906-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
578; GFX906:       ; %bb.0:
579; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580; GFX906-NEXT:    v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
581; GFX906-NEXT:    s_setpc_b64 s[30:31]
582;
583; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
584; GFX9GEN:       ; %bb.0:
585; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
587; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
588; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
589; GFX9GEN-NEXT:    v_mad_f32 v0, -|v0|, v1, v2
590; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
591;
592; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
593; VI:       ; %bb.0:
594; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
596; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
597; VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
598; VI-NEXT:    v_mad_f32 v0, -|v0|, v1, v2
599; VI-NEXT:    s_setpc_b64 s[30:31]
600;
601; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
602; SDAG-CI:       ; %bb.0:
603; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604; SDAG-CI-NEXT:    v_mad_f32 v0, -|v0|, v1, v2
605; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
606;
607; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
608; GISEL-CI:       ; %bb.0:
609; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
611; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
612; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
613; GISEL-CI-NEXT:    v_mad_f32 v0, -|v0|, v1, v2
614; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
615  %src0.ext = fpext half %src0 to float
616  %src1.ext = fpext half %src1 to float
617  %src2.ext = fpext half %src2 to float
618  %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
619  %src0.ext.neg.abs = fneg float %src0.ext.abs
620  %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext)
621  ret float %result
622}
623
624define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 {
625; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
626; GFX1100:       ; %bb.0:
627; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
628; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
629; GFX1100-NEXT:    s_setpc_b64 s[30:31]
630;
631; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
632; GFX900:       ; %bb.0:
633; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
635; GFX900-NEXT:    s_setpc_b64 s[30:31]
636;
637; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
638; GFX906:       ; %bb.0:
639; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
640; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
641; GFX906-NEXT:    s_setpc_b64 s[30:31]
642;
643; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
644; GFX9GEN:       ; %bb.0:
645; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
646; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
647; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
648; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, v2
649; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
650;
651; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
652; VI:       ; %bb.0:
653; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
655; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
656; VI-NEXT:    v_mad_f32 v0, v0, v1, v2
657; VI-NEXT:    s_setpc_b64 s[30:31]
658;
659; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
660; SDAG-CI:       ; %bb.0:
661; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
662; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
663; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
664;
665; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
666; GISEL-CI:       ; %bb.0:
667; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
668; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
669; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
670; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
671; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
672  %src0.ext = fpext half %src0 to float
673  %src1.ext = fpext half %src1 to float
674  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
675  ret float %result
676}
677
678define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 {
679; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
680; GFX1100:       ; %bb.0:
681; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
682; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
683; GFX1100-NEXT:    s_setpc_b64 s[30:31]
684;
685; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
686; GFX900:       ; %bb.0:
687; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
689; GFX900-NEXT:    s_setpc_b64 s[30:31]
690;
691; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
692; GFX906:       ; %bb.0:
693; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
694; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
695; GFX906-NEXT:    s_setpc_b64 s[30:31]
696;
697; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
698; GFX9GEN:       ; %bb.0:
699; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
701; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
702; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, -v2
703; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
704;
705; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
706; VI:       ; %bb.0:
707; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
709; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
710; VI-NEXT:    v_mad_f32 v0, v0, v1, -v2
711; VI-NEXT:    s_setpc_b64 s[30:31]
712;
713; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
714; SDAG-CI:       ; %bb.0:
715; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, -v2
717; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
718;
719; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
720; GISEL-CI:       ; %bb.0:
721; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
722; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
723; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
724; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, -v2
725; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
726  %src0.ext = fpext half %src0 to float
727  %src1.ext = fpext half %src1 to float
728  %src2.neg = fneg float %src2
729  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg)
730  ret float %result
731}
732
733define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 {
734; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
735; GFX1100:       ; %bb.0:
736; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
738; GFX1100-NEXT:    s_setpc_b64 s[30:31]
739;
740; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
741; GFX900:       ; %bb.0:
742; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
744; GFX900-NEXT:    s_setpc_b64 s[30:31]
745;
746; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
747; GFX906:       ; %bb.0:
748; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
749; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
750; GFX906-NEXT:    s_setpc_b64 s[30:31]
751;
752; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
753; GFX9GEN:       ; %bb.0:
754; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
755; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
756; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
757; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, |v2|
758; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
759;
760; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
761; VI:       ; %bb.0:
762; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
764; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
765; VI-NEXT:    v_mad_f32 v0, v0, v1, |v2|
766; VI-NEXT:    s_setpc_b64 s[30:31]
767;
768; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
769; SDAG-CI:       ; %bb.0:
770; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, |v2|
772; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
773;
774; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
775; GISEL-CI:       ; %bb.0:
776; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
778; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
779; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, |v2|
780; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
781  %src0.ext = fpext half %src0 to float
782  %src1.ext = fpext half %src1 to float
783  %src2.abs = call float @llvm.fabs.f32(float %src2)
784  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs)
785  ret float %result
786}
787
788define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 {
789; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
790; GFX1100:       ; %bb.0:
791; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
792; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
793; GFX1100-NEXT:    s_setpc_b64 s[30:31]
794;
795; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
796; GFX900:       ; %bb.0:
797; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
799; GFX900-NEXT:    s_setpc_b64 s[30:31]
800;
801; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
802; GFX906:       ; %bb.0:
803; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
804; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
805; GFX906-NEXT:    s_setpc_b64 s[30:31]
806;
807; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
808; GFX9GEN:       ; %bb.0:
809; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
810; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
811; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
812; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, -|v2|
813; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
814;
815; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
816; VI:       ; %bb.0:
817; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
818; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
819; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
820; VI-NEXT:    v_mad_f32 v0, v0, v1, -|v2|
821; VI-NEXT:    s_setpc_b64 s[30:31]
822;
823; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
824; SDAG-CI:       ; %bb.0:
825; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
826; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, -|v2|
827; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
828;
829; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
830; GISEL-CI:       ; %bb.0:
831; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
832; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
833; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
834; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, -|v2|
835; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
836  %src0.ext = fpext half %src0 to float
837  %src1.ext = fpext half %src1 to float
838  %src2.abs = call float @llvm.fabs.f32(float %src2)
839  %src2.neg.abs = fneg float %src2.abs
840  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs)
841  ret float %result
842}
843
844; TODO: Fold inline immediates. Need to be careful because it is an
845; f16 inline immediate that may be converted to f32, not an actual f32
846; inline immediate.
847
848define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
849; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
850; SDAG-GFX1100:       ; %bb.0:
851; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
852; SDAG-GFX1100-NEXT:    s_mov_b32 s0, 1.0
853; SDAG-GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
854; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
855; SDAG-GFX1100-NEXT:    s_setpc_b64 s[30:31]
856;
857; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
858; SDAG-GFX900:       ; %bb.0:
859; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
860; SDAG-GFX900-NEXT:    s_mov_b32 s4, 1.0
861; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
862; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
863;
864; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
865; SDAG-GFX906:       ; %bb.0:
866; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
867; SDAG-GFX906-NEXT:    s_mov_b32 s4, 1.0
868; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
869; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
870;
871; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
872; GFX9GEN:       ; %bb.0:
873; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
874; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
875; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
876; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, 1.0
877; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
878;
879; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
880; VI:       ; %bb.0:
881; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
882; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
883; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
884; VI-NEXT:    v_mad_f32 v0, v0, v1, 1.0
885; VI-NEXT:    s_setpc_b64 s[30:31]
886;
887; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
888; SDAG-CI:       ; %bb.0:
889; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, 1.0
891; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
892;
893; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
894; GISEL-GFX1100:       ; %bb.0:
895; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v2, 1.0
897; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
898; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
899; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
900;
901; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
902; GISEL-GFX900:       ; %bb.0:
903; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
904; GISEL-GFX900-NEXT:    v_mov_b32_e32 v2, 1.0
905; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
906; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
907;
908; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
909; GISEL-GFX906:       ; %bb.0:
910; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
911; GISEL-GFX906-NEXT:    v_mov_b32_e32 v2, 1.0
912; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
913; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
914;
915; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
916; GISEL-CI:       ; %bb.0:
917; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
918; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
919; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
920; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, 1.0
921; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
922  %src0.ext = fpext half %src0 to float
923  %src1.ext = fpext half %src1 to float
924  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0)
925  ret float %result
926}
927
928define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 {
929; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
930; SDAG-GFX1100:       ; %bb.0:
931; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
932; SDAG-GFX1100-NEXT:    s_mov_b32 s0, 0.15915494
933; SDAG-GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
934; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
935; SDAG-GFX1100-NEXT:    s_setpc_b64 s[30:31]
936;
937; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
938; SDAG-GFX900:       ; %bb.0:
939; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
940; SDAG-GFX900-NEXT:    s_mov_b32 s4, 0.15915494
941; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
942; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
943;
944; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
945; SDAG-GFX906:       ; %bb.0:
946; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
947; SDAG-GFX906-NEXT:    s_mov_b32 s4, 0.15915494
948; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
949; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
950;
951; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
952; GFX9GEN:       ; %bb.0:
953; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
954; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
955; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
956; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, 0.15915494
957; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
958;
959; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
960; VI:       ; %bb.0:
961; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
963; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
964; VI-NEXT:    v_mad_f32 v0, v0, v1, 0.15915494
965; VI-NEXT:    s_setpc_b64 s[30:31]
966;
967; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
968; SDAG-CI:       ; %bb.0:
969; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970; SDAG-CI-NEXT:    v_madak_f32 v0, v0, v1, 0x3e22f983
971; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
972;
973; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
974; GISEL-GFX1100:       ; %bb.0:
975; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
976; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v2, 0.15915494
977; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
978; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
979; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
980;
981; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
982; GISEL-GFX900:       ; %bb.0:
983; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984; GISEL-GFX900-NEXT:    v_mov_b32_e32 v2, 0.15915494
985; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
986; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
987;
988; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
989; GISEL-GFX906:       ; %bb.0:
990; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
991; GISEL-GFX906-NEXT:    v_mov_b32_e32 v2, 0.15915494
992; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
993; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
994;
995; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
996; GISEL-CI:       ; %bb.0:
997; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
998; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v0
999; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1000; GISEL-CI-NEXT:    v_mov_b32_e32 v0, 0x3e22f983
1001; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v2, v1
1002; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1003  %src0.ext = fpext half %src0 to float
1004  %src1.ext = fpext half %src1 to float
1005  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000)
1006  ret float %result
1007}
1008
1009; Attempt to break inline immediate folding. If the operand is
1010; interpreted as f32, the inline immediate is really the f16 inline
1011; imm value converted to f32.
1012;	fpext f16 1/2pi = 0x3e230000
1013;	      f32 1/2pi = 0x3e22f983
1014
1015define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 {
1016; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1017; SDAG-GFX1100:       ; %bb.0:
1018; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1019; SDAG-GFX1100-NEXT:    s_mov_b32 s0, 0x3e230000
1020; SDAG-GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1021; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
1022; SDAG-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1023;
1024; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1025; SDAG-GFX900:       ; %bb.0:
1026; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027; SDAG-GFX900-NEXT:    s_mov_b32 s4, 0x3e230000
1028; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1029; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
1030;
1031; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1032; SDAG-GFX906:       ; %bb.0:
1033; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1034; SDAG-GFX906-NEXT:    s_mov_b32 s4, 0x3e230000
1035; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1036; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
1037;
1038; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1039; SDAG-GFX9GEN:       ; %bb.0:
1040; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1041; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1042; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
1043; SDAG-GFX9GEN-NEXT:    v_madak_f32 v0, v0, v1, 0x3e230000
1044; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1045;
1046; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1047; SDAG-VI:       ; %bb.0:
1048; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1049; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1050; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1051; SDAG-VI-NEXT:    v_madak_f32 v0, v0, v1, 0x3e230000
1052; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
1053;
1054; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1055; SDAG-CI:       ; %bb.0:
1056; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057; SDAG-CI-NEXT:    v_madak_f32 v0, v0, v1, 0x3e230000
1058; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1059;
1060; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1061; GISEL-GFX1100:       ; %bb.0:
1062; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v2, 0x3e230000
1064; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1065; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1066; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1067;
1068; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1069; GISEL-GFX900:       ; %bb.0:
1070; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1071; GISEL-GFX900-NEXT:    v_mov_b32_e32 v2, 0x3e230000
1072; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1073; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
1074;
1075; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1076; GISEL-GFX906:       ; %bb.0:
1077; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1078; GISEL-GFX906-NEXT:    v_mov_b32_e32 v2, 0x3e230000
1079; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1080; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
1081;
1082; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1083; GISEL-GFX9GEN:       ; %bb.0:
1084; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1085; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v0
1086; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
1087; GISEL-GFX9GEN-NEXT:    v_mov_b32_e32 v0, 0x3e230000
1088; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v2, v1
1089; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1090;
1091; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1092; GISEL-VI:       ; %bb.0:
1093; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1094; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v2, v0
1095; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1096; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x3e230000
1097; GISEL-VI-NEXT:    v_mac_f32_e32 v0, v2, v1
1098; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
1099;
1100; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
1101; GISEL-CI:       ; %bb.0:
1102; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1103; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v0
1104; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1105; GISEL-CI-NEXT:    v_mov_b32_e32 v0, 0x3e230000
1106; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v2, v1
1107; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1108  %src0.ext = fpext half %src0 to float
1109  %src1.ext = fpext half %src1 to float
1110  %src2 = fpext half 0xH3118 to float
1111  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1112  ret float %result
1113}
1114
1115
1116define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
1117; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1118; SDAG-GFX1100:       ; %bb.0:
1119; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1120; SDAG-GFX1100-NEXT:    s_mov_b32 s0, 0x367c0000
1121; SDAG-GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1122; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0]
1123; SDAG-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1124;
1125; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1126; SDAG-GFX900:       ; %bb.0:
1127; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1128; SDAG-GFX900-NEXT:    s_mov_b32 s4, 0x367c0000
1129; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1130; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
1131;
1132; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1133; SDAG-GFX906:       ; %bb.0:
1134; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1135; SDAG-GFX906-NEXT:    s_mov_b32 s4, 0x367c0000
1136; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1137; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
1138;
1139; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1140; SDAG-GFX9GEN:       ; %bb.0:
1141; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1142; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1143; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
1144; SDAG-GFX9GEN-NEXT:    v_madak_f32 v0, v0, v1, 0x367c0000
1145; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1146;
1147; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1148; SDAG-VI:       ; %bb.0:
1149; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1151; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1152; SDAG-VI-NEXT:    v_madak_f32 v0, v0, v1, 0x367c0000
1153; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
1154;
1155; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1156; SDAG-CI:       ; %bb.0:
1157; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1158; SDAG-CI-NEXT:    v_madak_f32 v0, v0, v1, 0x367c0000
1159; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1160;
1161; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1162; GISEL-GFX1100:       ; %bb.0:
1163; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1164; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v2, 0x367c0000
1165; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1166; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1167; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1168;
1169; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1170; GISEL-GFX900:       ; %bb.0:
1171; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1172; GISEL-GFX900-NEXT:    v_mov_b32_e32 v2, 0x367c0000
1173; GISEL-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1174; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
1175;
1176; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1177; GISEL-GFX906:       ; %bb.0:
1178; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1179; GISEL-GFX906-NEXT:    v_mov_b32_e32 v2, 0x367c0000
1180; GISEL-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1181; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
1182;
1183; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1184; GISEL-GFX9GEN:       ; %bb.0:
1185; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1186; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v0
1187; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
1188; GISEL-GFX9GEN-NEXT:    v_mov_b32_e32 v0, 0x367c0000
1189; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v2, v1
1190; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1191;
1192; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1193; GISEL-VI:       ; %bb.0:
1194; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1195; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v2, v0
1196; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1197; GISEL-VI-NEXT:    v_mov_b32_e32 v0, 0x367c0000
1198; GISEL-VI-NEXT:    v_mac_f32_e32 v0, v2, v1
1199; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
1200;
1201; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
1202; GISEL-CI:       ; %bb.0:
1203; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1204; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v0
1205; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1206; GISEL-CI-NEXT:    v_mov_b32_e32 v0, 0x367c0000
1207; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v2, v1
1208; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1209  %src0.ext = fpext half %src0 to float
1210  %src1.ext = fpext half %src1 to float
1211  %src2 = fpext half 0xH003F to float
1212  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1213  ret float %result
1214}
1215
1216define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 {
1217; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
1218; SDAG-GFX1100:       ; %bb.0:
1219; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1220; SDAG-GFX1100-NEXT:    s_mov_b32 s0, 1.0
1221; SDAG-GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1222; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1223; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1224; SDAG-GFX1100-NEXT:    v_mov_b32_e32 v0, v2
1225; SDAG-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1226;
1227; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
1228; SDAG-GFX900:       ; %bb.0:
1229; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1230; SDAG-GFX900-NEXT:    s_mov_b32 s4, 1.0
1231; SDAG-GFX900-NEXT:    v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1232; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1233; SDAG-GFX900-NEXT:    v_mov_b32_e32 v1, v2
1234; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
1235;
1236; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
1237; SDAG-GFX906:       ; %bb.0:
1238; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1239; SDAG-GFX906-NEXT:    s_mov_b32 s4, 1.0
1240; SDAG-GFX906-NEXT:    v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1241; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1242; SDAG-GFX906-NEXT:    v_mov_b32_e32 v1, v2
1243; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
1244;
1245; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
1246; SDAG-GFX9GEN:       ; %bb.0:
1247; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1248; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1249; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1250; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v3, v1
1251; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1252; SDAG-GFX9GEN-NEXT:    v_mad_f32 v0, v0, v3, 1.0
1253; SDAG-GFX9GEN-NEXT:    v_mad_f32 v1, v2, v1, 1.0
1254; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1255;
1256; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
1257; SDAG-VI:       ; %bb.0:
1258; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1259; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1260; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1261; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v3, v1
1262; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1263; SDAG-VI-NEXT:    v_mad_f32 v0, v0, v3, 1.0
1264; SDAG-VI-NEXT:    v_mad_f32 v1, v2, v1, 1.0
1265; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
1266;
1267; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1:
1268; SDAG-CI:       ; %bb.0:
1269; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1270; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1271; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1272; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1273; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1274; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1275; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1276; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1277; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1278; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v2, 1.0
1279; SDAG-CI-NEXT:    v_mad_f32 v1, v1, v3, 1.0
1280; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1281;
1282; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1:
1283; GISEL-GFX1100:       ; %bb.0:
1284; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1285; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v3, 1.0
1286; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1287; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1288; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1289; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v0, v2
1290; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1291;
1292; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1:
1293; GISEL-GFX900:       ; %bb.0:
1294; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295; GISEL-GFX900-NEXT:    v_mov_b32_e32 v3, 1.0
1296; GISEL-GFX900-NEXT:    v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1297; GISEL-GFX900-NEXT:    v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1298; GISEL-GFX900-NEXT:    v_mov_b32_e32 v0, v2
1299; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
1300;
1301; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1:
1302; GISEL-GFX906:       ; %bb.0:
1303; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1304; GISEL-GFX906-NEXT:    v_mov_b32_e32 v3, 1.0
1305; GISEL-GFX906-NEXT:    v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1306; GISEL-GFX906-NEXT:    v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1307; GISEL-GFX906-NEXT:    v_mov_b32_e32 v0, v2
1308; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
1309;
1310; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
1311; GISEL-GFX9GEN:       ; %bb.0:
1312; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1313; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v0
1314; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1315; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v1
1316; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1317; GISEL-GFX9GEN-NEXT:    v_mad_f32 v0, v2, v0, 1.0
1318; GISEL-GFX9GEN-NEXT:    v_mad_f32 v1, v3, v1, 1.0
1319; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1320;
1321; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
1322; GISEL-VI:       ; %bb.0:
1323; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1324; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v2, v0
1325; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1326; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v1
1327; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1328; GISEL-VI-NEXT:    v_mad_f32 v0, v2, v0, 1.0
1329; GISEL-VI-NEXT:    v_mad_f32 v1, v3, v1, 1.0
1330; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
1331;
1332; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1:
1333; GISEL-CI:       ; %bb.0:
1334; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1335; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1336; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1337; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1338; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1339; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v2, 1.0
1340; GISEL-CI-NEXT:    v_mad_f32 v1, v1, v3, 1.0
1341; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1342  %src0.ext = fpext <2 x half> %src0 to <2 x float>
1343  %src1.ext = fpext <2 x half> %src1 to <2 x float>
1344  %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>)
1345  ret <2 x float> %result
1346}
1347
1348define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
1349; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1350; SDAG-GFX1100:       ; %bb.0:
1351; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1352; SDAG-GFX1100-NEXT:    s_mov_b32 s0, 0x3e230000
1353; SDAG-GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1354; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1355; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1356; SDAG-GFX1100-NEXT:    v_mov_b32_e32 v0, v2
1357; SDAG-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1358;
1359; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1360; SDAG-GFX900:       ; %bb.0:
1361; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1362; SDAG-GFX900-NEXT:    s_mov_b32 s4, 0x3e230000
1363; SDAG-GFX900-NEXT:    v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1364; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1365; SDAG-GFX900-NEXT:    v_mov_b32_e32 v1, v2
1366; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
1367;
1368; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1369; SDAG-GFX906:       ; %bb.0:
1370; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1371; SDAG-GFX906-NEXT:    s_mov_b32 s4, 0x3e230000
1372; SDAG-GFX906-NEXT:    v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1373; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1374; SDAG-GFX906-NEXT:    v_mov_b32_e32 v1, v2
1375; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
1376;
1377; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1378; SDAG-GFX9GEN:       ; %bb.0:
1379; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1381; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1382; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v3, v1
1383; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1384; SDAG-GFX9GEN-NEXT:    v_mov_b32_e32 v1, 0x3e230000
1385; SDAG-GFX9GEN-NEXT:    v_madak_f32 v0, v0, v3, 0x3e230000
1386; SDAG-GFX9GEN-NEXT:    v_mac_f32_e32 v1, v2, v4
1387; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1388;
1389; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1390; SDAG-VI:       ; %bb.0:
1391; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1393; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1394; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v3, v1
1395; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1396; SDAG-VI-NEXT:    v_mov_b32_e32 v1, 0x3e230000
1397; SDAG-VI-NEXT:    v_madak_f32 v0, v0, v3, 0x3e230000
1398; SDAG-VI-NEXT:    v_mac_f32_e32 v1, v2, v4
1399; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
1400;
1401; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1402; SDAG-CI:       ; %bb.0:
1403; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1404; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1405; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1406; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1407; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1408; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1409; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1410; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1411; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v4, v1
1412; SDAG-CI-NEXT:    v_mov_b32_e32 v1, 0x3e230000
1413; SDAG-CI-NEXT:    v_madak_f32 v0, v0, v2, 0x3e230000
1414; SDAG-CI-NEXT:    v_mac_f32_e32 v1, v4, v3
1415; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1416;
1417; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1418; GISEL-GFX1100:       ; %bb.0:
1419; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1420; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v3, 0x3e230000
1421; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1422; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1423; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1424; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v0, v2
1425; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1426;
1427; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1428; GISEL-GFX900:       ; %bb.0:
1429; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1430; GISEL-GFX900-NEXT:    v_mov_b32_e32 v3, 0x3e230000
1431; GISEL-GFX900-NEXT:    v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1432; GISEL-GFX900-NEXT:    v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1433; GISEL-GFX900-NEXT:    v_mov_b32_e32 v0, v2
1434; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
1435;
1436; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1437; GISEL-GFX906:       ; %bb.0:
1438; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1439; GISEL-GFX906-NEXT:    v_mov_b32_e32 v3, 0x3e230000
1440; GISEL-GFX906-NEXT:    v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1441; GISEL-GFX906-NEXT:    v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1442; GISEL-GFX906-NEXT:    v_mov_b32_e32 v0, v2
1443; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
1444;
1445; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1446; GISEL-GFX9GEN:       ; %bb.0:
1447; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1448; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v0
1449; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1450; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v1
1451; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1452; GISEL-GFX9GEN-NEXT:    v_mov_b32_e32 v1, 0x3e230000
1453; GISEL-GFX9GEN-NEXT:    v_madak_f32 v0, v2, v0, 0x3e230000
1454; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v1, v3, v4
1455; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1456;
1457; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1458; GISEL-VI:       ; %bb.0:
1459; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1460; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v2, v0
1461; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1462; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v1
1463; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1464; GISEL-VI-NEXT:    v_mov_b32_e32 v1, 0x3e230000
1465; GISEL-VI-NEXT:    v_madak_f32 v0, v2, v0, 0x3e230000
1466; GISEL-VI-NEXT:    v_mac_f32_e32 v1, v3, v4
1467; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
1468;
1469; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
1470; GISEL-CI:       ; %bb.0:
1471; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1472; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1473; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v4, v1
1474; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1475; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1476; GISEL-CI-NEXT:    v_mov_b32_e32 v1, 0x3e230000
1477; GISEL-CI-NEXT:    v_madak_f32 v0, v0, v2, 0x3e230000
1478; GISEL-CI-NEXT:    v_mac_f32_e32 v1, v4, v3
1479; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1480  %src0.ext = fpext <2 x half> %src0 to <2 x float>
1481  %src1.ext = fpext <2 x half> %src1 to <2 x float>
1482  %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
1483  %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2)
1484  ret <2 x float> %result
1485}
1486
1487define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 {
1488; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1489; SDAG-GFX1100:       ; %bb.0:
1490; SDAG-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1491; SDAG-GFX1100-NEXT:    s_mov_b32 s0, 0.15915494
1492; SDAG-GFX1100-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1493; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0]
1494; SDAG-GFX1100-NEXT:    v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1495; SDAG-GFX1100-NEXT:    v_mov_b32_e32 v0, v2
1496; SDAG-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1497;
1498; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1499; SDAG-GFX900:       ; %bb.0:
1500; SDAG-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1501; SDAG-GFX900-NEXT:    s_mov_b32 s4, 0.15915494
1502; SDAG-GFX900-NEXT:    v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1503; SDAG-GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1504; SDAG-GFX900-NEXT:    v_mov_b32_e32 v1, v2
1505; SDAG-GFX900-NEXT:    s_setpc_b64 s[30:31]
1506;
1507; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1508; SDAG-GFX906:       ; %bb.0:
1509; SDAG-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510; SDAG-GFX906-NEXT:    s_mov_b32 s4, 0.15915494
1511; SDAG-GFX906-NEXT:    v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1512; SDAG-GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
1513; SDAG-GFX906-NEXT:    v_mov_b32_e32 v1, v2
1514; SDAG-GFX906-NEXT:    s_setpc_b64 s[30:31]
1515;
1516; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1517; SDAG-GFX9GEN:       ; %bb.0:
1518; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1519; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1520; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1521; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v3, v1
1522; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1523; SDAG-GFX9GEN-NEXT:    v_mad_f32 v0, v0, v3, 0.15915494
1524; SDAG-GFX9GEN-NEXT:    v_mad_f32 v1, v2, v1, 0.15915494
1525; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1526;
1527; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1528; SDAG-VI:       ; %bb.0:
1529; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1530; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1531; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1532; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v3, v1
1533; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1534; SDAG-VI-NEXT:    v_mad_f32 v0, v0, v3, 0.15915494
1535; SDAG-VI-NEXT:    v_mad_f32 v1, v2, v1, 0.15915494
1536; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
1537;
1538; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1539; SDAG-CI:       ; %bb.0:
1540; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1541; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
1542; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
1543; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
1544; SDAG-CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
1545; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1546; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1547; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1548; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v4, v1
1549; SDAG-CI-NEXT:    v_mov_b32_e32 v1, 0x3e22f983
1550; SDAG-CI-NEXT:    v_madak_f32 v0, v0, v2, 0x3e22f983
1551; SDAG-CI-NEXT:    v_mac_f32_e32 v1, v4, v3
1552; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1553;
1554; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1555; GISEL-GFX1100:       ; %bb.0:
1556; GISEL-GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1557; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v3, 0.15915494
1558; GISEL-GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
1559; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1560; GISEL-GFX1100-NEXT:    v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1561; GISEL-GFX1100-NEXT:    v_mov_b32_e32 v0, v2
1562; GISEL-GFX1100-NEXT:    s_setpc_b64 s[30:31]
1563;
1564; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1565; GISEL-GFX900:       ; %bb.0:
1566; GISEL-GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1567; GISEL-GFX900-NEXT:    v_mov_b32_e32 v3, 0.15915494
1568; GISEL-GFX900-NEXT:    v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1569; GISEL-GFX900-NEXT:    v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1570; GISEL-GFX900-NEXT:    v_mov_b32_e32 v0, v2
1571; GISEL-GFX900-NEXT:    s_setpc_b64 s[30:31]
1572;
1573; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1574; GISEL-GFX906:       ; %bb.0:
1575; GISEL-GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1576; GISEL-GFX906-NEXT:    v_mov_b32_e32 v3, 0.15915494
1577; GISEL-GFX906-NEXT:    v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0]
1578; GISEL-GFX906-NEXT:    v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0]
1579; GISEL-GFX906-NEXT:    v_mov_b32_e32 v0, v2
1580; GISEL-GFX906-NEXT:    s_setpc_b64 s[30:31]
1581;
1582; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1583; GISEL-GFX9GEN:       ; %bb.0:
1584; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1585; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v0
1586; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1587; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v1
1588; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1589; GISEL-GFX9GEN-NEXT:    v_mad_f32 v0, v2, v0, 0.15915494
1590; GISEL-GFX9GEN-NEXT:    v_mad_f32 v1, v3, v1, 0.15915494
1591; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1592;
1593; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1594; GISEL-VI:       ; %bb.0:
1595; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1596; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v2, v0
1597; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1598; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v1
1599; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1600; GISEL-VI-NEXT:    v_mad_f32 v0, v2, v0, 0.15915494
1601; GISEL-VI-NEXT:    v_mad_f32 v1, v3, v1, 0.15915494
1602; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
1603;
1604; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
1605; GISEL-CI:       ; %bb.0:
1606; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1607; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1608; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v4, v1
1609; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1610; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
1611; GISEL-CI-NEXT:    v_mov_b32_e32 v1, 0x3e22f983
1612; GISEL-CI-NEXT:    v_madak_f32 v0, v0, v2, 0x3e22f983
1613; GISEL-CI-NEXT:    v_mac_f32_e32 v1, v4, v3
1614; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1615  %src0.ext = fpext <2 x half> %src0 to <2 x float>
1616  %src1.ext = fpext <2 x half> %src1 to <2 x float>
1617  %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float>
1618  %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>)
1619  ret <2 x float> %result
1620}
1621
1622define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 {
1623; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1624; GFX1100:       ; %bb.0:
1625; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1626; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1627; GFX1100-NEXT:    s_setpc_b64 s[30:31]
1628;
1629; GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1630; GFX900:       ; %bb.0:
1631; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1632; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1633; GFX900-NEXT:    s_setpc_b64 s[30:31]
1634;
1635; GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1636; GFX906:       ; %bb.0:
1637; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1638; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
1639; GFX906-NEXT:    s_setpc_b64 s[30:31]
1640;
1641; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1642; GFX9GEN:       ; %bb.0:
1643; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1644; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1645; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1646; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1647; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
1648; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1649;
1650; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1651; VI:       ; %bb.0:
1652; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1653; VI-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1654; VI-NEXT:    v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1655; VI-NEXT:    v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1656; VI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
1657; VI-NEXT:    s_setpc_b64 s[30:31]
1658;
1659; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1660; SDAG-CI:       ; %bb.0:
1661; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1662; SDAG-CI-NEXT:    v_mad_f32 v0, v1, v3, v5 clamp
1663; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1664;
1665; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
1666; GISEL-CI:       ; %bb.0:
1667; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1668; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v1
1669; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v3
1670; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v5
1671; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, v2 clamp
1672; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1673  %src0.hi = extractelement <2 x half> %src0, i32 1
1674  %src1.hi = extractelement <2 x half> %src1, i32 1
1675  %src2.hi = extractelement <2 x half> %src2, i32 1
1676  %src0.ext = fpext half %src0.hi to float
1677  %src1.ext = fpext half %src1.hi to float
1678  %src2.ext = fpext half %src2.hi to float
1679  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1680  %max = call float @llvm.maxnum.f32(float %result, float 0.0)
1681  %clamp = call float @llvm.minnum.f32(float %max, float 1.0)
1682  ret float %clamp
1683}
1684
1685define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
1686; GFX1100-LABEL: no_mix_simple:
1687; GFX1100:       ; %bb.0:
1688; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1689; GFX1100-NEXT:    v_fma_f32 v0, v0, v1, v2
1690; GFX1100-NEXT:    s_setpc_b64 s[30:31]
1691;
1692; GFX900-LABEL: no_mix_simple:
1693; GFX900:       ; %bb.0:
1694; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1695; GFX900-NEXT:    v_mad_f32 v0, v0, v1, v2
1696; GFX900-NEXT:    s_setpc_b64 s[30:31]
1697;
1698; GFX906-LABEL: no_mix_simple:
1699; GFX906:       ; %bb.0:
1700; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1701; GFX906-NEXT:    v_fma_f32 v0, v0, v1, v2
1702; GFX906-NEXT:    s_setpc_b64 s[30:31]
1703;
1704; GFX9GEN-LABEL: no_mix_simple:
1705; GFX9GEN:       ; %bb.0:
1706; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1707; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, v2
1708; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1709;
1710; VI-LABEL: no_mix_simple:
1711; VI:       ; %bb.0:
1712; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1713; VI-NEXT:    v_mad_f32 v0, v0, v1, v2
1714; VI-NEXT:    s_setpc_b64 s[30:31]
1715;
1716; CI-LABEL: no_mix_simple:
1717; CI:       ; %bb.0:
1718; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1719; CI-NEXT:    v_mad_f32 v0, v0, v1, v2
1720; CI-NEXT:    s_setpc_b64 s[30:31]
1721  %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2)
1722  ret float %result
1723}
1724
1725define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
1726; GFX1100-LABEL: no_mix_simple_fabs:
1727; GFX1100:       ; %bb.0:
1728; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1729; GFX1100-NEXT:    v_fma_f32 v0, |v0|, v1, v2
1730; GFX1100-NEXT:    s_setpc_b64 s[30:31]
1731;
1732; GFX900-LABEL: no_mix_simple_fabs:
1733; GFX900:       ; %bb.0:
1734; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1735; GFX900-NEXT:    v_mad_f32 v0, |v0|, v1, v2
1736; GFX900-NEXT:    s_setpc_b64 s[30:31]
1737;
1738; GFX906-LABEL: no_mix_simple_fabs:
1739; GFX906:       ; %bb.0:
1740; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1741; GFX906-NEXT:    v_fma_f32 v0, |v0|, v1, v2
1742; GFX906-NEXT:    s_setpc_b64 s[30:31]
1743;
1744; GFX9GEN-LABEL: no_mix_simple_fabs:
1745; GFX9GEN:       ; %bb.0:
1746; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1747; GFX9GEN-NEXT:    v_mad_f32 v0, |v0|, v1, v2
1748; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1749;
1750; VI-LABEL: no_mix_simple_fabs:
1751; VI:       ; %bb.0:
1752; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1753; VI-NEXT:    v_mad_f32 v0, |v0|, v1, v2
1754; VI-NEXT:    s_setpc_b64 s[30:31]
1755;
1756; CI-LABEL: no_mix_simple_fabs:
1757; CI:       ; %bb.0:
1758; CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1759; CI-NEXT:    v_mad_f32 v0, |v0|, v1, v2
1760; CI-NEXT:    s_setpc_b64 s[30:31]
1761  %src0.fabs = call float @llvm.fabs.f32(float %src0)
1762  %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2)
1763  ret float %result
1764}
1765
1766; FIXME(DAG): Should abe able to select in this case.
1767; All sources are converted from f16, so it doesn't matter
1768; v_mad_mix_f32 flushes.
1769
1770define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 {
1771; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1772; GFX1100:       ; %bb.0:
1773; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1774; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1775; GFX1100-NEXT:    s_setpc_b64 s[30:31]
1776;
1777; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1778; GFX900:       ; %bb.0:
1779; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1780; GFX900-NEXT:    v_cvt_f32_f16_e32 v0, v0
1781; GFX900-NEXT:    v_cvt_f32_f16_e32 v1, v1
1782; GFX900-NEXT:    v_cvt_f32_f16_e32 v2, v2
1783; GFX900-NEXT:    v_fma_f32 v0, v0, v1, v2
1784; GFX900-NEXT:    s_setpc_b64 s[30:31]
1785;
1786; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1787; GFX906:       ; %bb.0:
1788; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1789; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
1790; GFX906-NEXT:    s_setpc_b64 s[30:31]
1791;
1792; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1793; GFX9GEN:       ; %bb.0:
1794; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1795; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1796; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
1797; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
1798; GFX9GEN-NEXT:    v_fma_f32 v0, v0, v1, v2
1799; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1800;
1801; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1802; VI:       ; %bb.0:
1803; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1804; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1805; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1806; VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1807; VI-NEXT:    v_mul_f32_e32 v0, v0, v1
1808; VI-NEXT:    v_add_f32_e32 v0, v0, v2
1809; VI-NEXT:    s_setpc_b64 s[30:31]
1810;
1811; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1812; SDAG-CI:       ; %bb.0:
1813; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1814; SDAG-CI-NEXT:    v_fma_f32 v0, v0, v1, v2
1815; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1816;
1817; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
1818; GISEL-CI:       ; %bb.0:
1819; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1820; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1821; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1822; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1823; GISEL-CI-NEXT:    v_fma_f32 v0, v0, v1, v2
1824; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1825  %src0.ext = fpext half %src0 to float
1826  %src1.ext = fpext half %src1 to float
1827  %src2.ext = fpext half %src2 to float
1828  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
1829  ret float %result
1830}
1831
1832define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 {
1833; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1834; GFX1100:       ; %bb.0:
1835; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1836; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1837; GFX1100-NEXT:    s_setpc_b64 s[30:31]
1838;
1839; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1840; GFX900:       ; %bb.0:
1841; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1842; GFX900-NEXT:    v_cvt_f32_f16_e32 v0, v0
1843; GFX900-NEXT:    v_cvt_f32_f16_e32 v1, v1
1844; GFX900-NEXT:    v_fma_f32 v0, v0, v1, v2
1845; GFX900-NEXT:    s_setpc_b64 s[30:31]
1846;
1847; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1848; GFX906:       ; %bb.0:
1849; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1850; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
1851; GFX906-NEXT:    s_setpc_b64 s[30:31]
1852;
1853; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1854; GFX9GEN:       ; %bb.0:
1855; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1856; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1857; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
1858; GFX9GEN-NEXT:    v_fma_f32 v0, v0, v1, v2
1859; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1860;
1861; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1862; VI:       ; %bb.0:
1863; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1864; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1865; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1866; VI-NEXT:    v_mul_f32_e32 v0, v0, v1
1867; VI-NEXT:    v_add_f32_e32 v0, v0, v2
1868; VI-NEXT:    s_setpc_b64 s[30:31]
1869;
1870; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1871; SDAG-CI:       ; %bb.0:
1872; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1873; SDAG-CI-NEXT:    v_fma_f32 v0, v0, v1, v2
1874; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1875;
1876; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
1877; GISEL-CI:       ; %bb.0:
1878; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1879; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1880; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1881; GISEL-CI-NEXT:    v_fma_f32 v0, v0, v1, v2
1882; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1883  %src0.ext = fpext half %src0 to float
1884  %src1.ext = fpext half %src1 to float
1885  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2)
1886  ret float %result
1887}
1888
1889define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 {
1890; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1891; GFX1100:       ; %bb.0:
1892; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1893; GFX1100-NEXT:    v_cvt_f32_f16_e32 v0, v0
1894; GFX1100-NEXT:    v_cvt_f32_f16_e32 v1, v1
1895; GFX1100-NEXT:    v_cvt_f32_f16_e32 v2, v2
1896; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
1897; GFX1100-NEXT:    v_mul_f32_e32 v0, v0, v1
1898; GFX1100-NEXT:    v_add_f32_e32 v0, v0, v2
1899; GFX1100-NEXT:    s_setpc_b64 s[30:31]
1900;
1901; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1902; GFX900:       ; %bb.0:
1903; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1904; GFX900-NEXT:    v_cvt_f32_f16_e32 v0, v0
1905; GFX900-NEXT:    v_cvt_f32_f16_e32 v1, v1
1906; GFX900-NEXT:    v_cvt_f32_f16_e32 v2, v2
1907; GFX900-NEXT:    v_mul_f32_e32 v0, v0, v1
1908; GFX900-NEXT:    v_add_f32_e32 v0, v0, v2
1909; GFX900-NEXT:    s_setpc_b64 s[30:31]
1910;
1911; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1912; GFX906:       ; %bb.0:
1913; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1914; GFX906-NEXT:    v_cvt_f32_f16_e32 v0, v0
1915; GFX906-NEXT:    v_cvt_f32_f16_e32 v1, v1
1916; GFX906-NEXT:    v_cvt_f32_f16_e32 v2, v2
1917; GFX906-NEXT:    v_mul_f32_e32 v0, v0, v1
1918; GFX906-NEXT:    v_add_f32_e32 v0, v0, v2
1919; GFX906-NEXT:    s_setpc_b64 s[30:31]
1920;
1921; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1922; GFX9GEN:       ; %bb.0:
1923; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1924; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1925; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
1926; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
1927; GFX9GEN-NEXT:    v_mul_f32_e32 v0, v0, v1
1928; GFX9GEN-NEXT:    v_add_f32_e32 v0, v0, v2
1929; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
1930;
1931; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1932; VI:       ; %bb.0:
1933; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1934; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1935; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1936; VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1937; VI-NEXT:    v_mul_f32_e32 v0, v0, v1
1938; VI-NEXT:    v_add_f32_e32 v0, v0, v2
1939; VI-NEXT:    s_setpc_b64 s[30:31]
1940;
1941; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1942; SDAG-CI:       ; %bb.0:
1943; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1944; SDAG-CI-NEXT:    v_mul_f32_e32 v0, v0, v1
1945; SDAG-CI-NEXT:    v_add_f32_e32 v0, v0, v2
1946; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
1947;
1948; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
1949; GISEL-CI:       ; %bb.0:
1950; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1951; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
1952; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
1953; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
1954; GISEL-CI-NEXT:    v_mul_f32_e32 v0, v0, v1
1955; GISEL-CI-NEXT:    v_add_f32_e32 v0, v0, v2
1956; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
1957  %src0.ext = fpext half %src0 to float
1958  %src1.ext = fpext half %src1 to float
1959  %src2.ext = fpext half %src2 to float
1960  %mul = fmul float %src0.ext, %src1.ext
1961  %result = fadd float %mul, %src2.ext
1962  ret float %result
1963}
1964
1965define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 {
1966; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1967; GFX1100:       ; %bb.0:
1968; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1969; GFX1100-NEXT:    v_cvt_f32_f16_e32 v0, v0
1970; GFX1100-NEXT:    v_cvt_f32_f16_e32 v1, v1
1971; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1972; GFX1100-NEXT:    v_mul_f32_e32 v0, v0, v1
1973; GFX1100-NEXT:    v_add_f32_e32 v0, v0, v2
1974; GFX1100-NEXT:    s_setpc_b64 s[30:31]
1975;
1976; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1977; GFX900:       ; %bb.0:
1978; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1979; GFX900-NEXT:    v_cvt_f32_f16_e32 v0, v0
1980; GFX900-NEXT:    v_cvt_f32_f16_e32 v1, v1
1981; GFX900-NEXT:    v_mul_f32_e32 v0, v0, v1
1982; GFX900-NEXT:    v_add_f32_e32 v0, v0, v2
1983; GFX900-NEXT:    s_setpc_b64 s[30:31]
1984;
1985; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1986; GFX906:       ; %bb.0:
1987; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1988; GFX906-NEXT:    v_cvt_f32_f16_e32 v0, v0
1989; GFX906-NEXT:    v_cvt_f32_f16_e32 v1, v1
1990; GFX906-NEXT:    v_mul_f32_e32 v0, v0, v1
1991; GFX906-NEXT:    v_add_f32_e32 v0, v0, v2
1992; GFX906-NEXT:    s_setpc_b64 s[30:31]
1993;
1994; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
1995; GFX9GEN:       ; %bb.0:
1996; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1997; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
1998; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
1999; GFX9GEN-NEXT:    v_mul_f32_e32 v0, v0, v1
2000; GFX9GEN-NEXT:    v_add_f32_e32 v0, v0, v2
2001; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2002;
2003; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
2004; VI:       ; %bb.0:
2005; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2006; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2007; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2008; VI-NEXT:    v_mul_f32_e32 v0, v0, v1
2009; VI-NEXT:    v_add_f32_e32 v0, v0, v2
2010; VI-NEXT:    s_setpc_b64 s[30:31]
2011;
2012; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
2013; SDAG-CI:       ; %bb.0:
2014; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2015; SDAG-CI-NEXT:    v_mul_f32_e32 v0, v0, v1
2016; SDAG-CI-NEXT:    v_add_f32_e32 v0, v0, v2
2017; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2018;
2019; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
2020; GISEL-CI:       ; %bb.0:
2021; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2022; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2023; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2024; GISEL-CI-NEXT:    v_mul_f32_e32 v0, v0, v1
2025; GISEL-CI-NEXT:    v_add_f32_e32 v0, v0, v2
2026; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2027  %src0.ext = fpext half %src0 to float
2028  %src1.ext = fpext half %src1 to float
2029  %mul = fmul float %src0.ext, %src1.ext
2030  %result = fadd float %mul, %src2
2031  ret float %result
2032}
2033
2034define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 {
2035; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2036; GFX1100:       ; %bb.0:
2037; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2038; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
2039; GFX1100-NEXT:    s_setpc_b64 s[30:31]
2040;
2041; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2042; GFX900:       ; %bb.0:
2043; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2044; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
2045; GFX900-NEXT:    s_setpc_b64 s[30:31]
2046;
2047; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2048; GFX906:       ; %bb.0:
2049; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2050; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
2051; GFX906-NEXT:    s_setpc_b64 s[30:31]
2052;
2053; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2054; GFX9GEN:       ; %bb.0:
2055; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2056; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v3, v0
2057; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2058; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
2059; GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
2060; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2061;
2062; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2063; VI:       ; %bb.0:
2064; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2065; VI-NEXT:    v_cvt_f32_f16_e32 v3, v0
2066; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2067; VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2068; VI-NEXT:    v_mac_f32_e32 v0, v3, v1
2069; VI-NEXT:    s_setpc_b64 s[30:31]
2070;
2071; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2072; SDAG-CI:       ; %bb.0:
2073; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2074; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
2075; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2076;
2077; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
2078; GISEL-CI:       ; %bb.0:
2079; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2080; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v0
2081; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2082; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2083; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v3, v1
2084; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2085  %src0.ext = fpext half %src0 to float
2086  %src1.ext = fpext half %src1 to float
2087  %src2.ext = fpext half %src2 to float
2088  %mul = fmul contract float %src0.ext, %src1.ext
2089  %result = fadd contract float %mul, %src2.ext
2090  ret float %result
2091}
2092
2093define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 {
2094; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2095; GFX1100:       ; %bb.0:
2096; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2097; GFX1100-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
2098; GFX1100-NEXT:    s_setpc_b64 s[30:31]
2099;
2100; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2101; GFX900:       ; %bb.0:
2102; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2103; GFX900-NEXT:    v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
2104; GFX900-NEXT:    s_setpc_b64 s[30:31]
2105;
2106; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2107; GFX906:       ; %bb.0:
2108; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2109; GFX906-NEXT:    v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
2110; GFX906-NEXT:    s_setpc_b64 s[30:31]
2111;
2112; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2113; GFX9GEN:       ; %bb.0:
2114; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2115; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
2116; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2117; GFX9GEN-NEXT:    v_mad_f32 v0, v0, v1, v2
2118; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2119;
2120; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2121; VI:       ; %bb.0:
2122; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2123; VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2124; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2125; VI-NEXT:    v_mad_f32 v0, v0, v1, v2
2126; VI-NEXT:    s_setpc_b64 s[30:31]
2127;
2128; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2129; SDAG-CI:       ; %bb.0:
2130; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2131; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
2132; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2133;
2134; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
2135; GISEL-CI:       ; %bb.0:
2136; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2137; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2138; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2139; GISEL-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
2140; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2141  %src0.ext = fpext half %src0 to float
2142  %src1.ext = fpext half %src1 to float
2143  %mul = fmul contract float %src0.ext, %src1.ext
2144  %result = fadd contract float %mul, %src2
2145  ret float %result
2146}
2147
2148define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2149; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2150; GFX1100:       ; %bb.0:
2151; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2152; GFX1100-NEXT:    v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
2153; GFX1100-NEXT:    s_setpc_b64 s[30:31]
2154;
2155; GFX900-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2156; GFX900:       ; %bb.0:
2157; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2158; GFX900-NEXT:    v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
2159; GFX900-NEXT:    s_setpc_b64 s[30:31]
2160;
2161; GFX906-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2162; GFX906:       ; %bb.0:
2163; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2164; GFX906-NEXT:    v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
2165; GFX906-NEXT:    s_setpc_b64 s[30:31]
2166;
2167; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2168; SDAG-GFX9GEN:       ; %bb.0:
2169; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2170; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v0
2171; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2172; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
2173; SDAG-GFX9GEN-NEXT:    v_mad_f32 v0, -v0, v1, v2
2174; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2175;
2176; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2177; SDAG-VI:       ; %bb.0:
2178; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2179; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2180; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2181; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
2182; SDAG-VI-NEXT:    v_mad_f32 v0, -v0, v1, v2
2183; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
2184;
2185; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2186; SDAG-CI:       ; %bb.0:
2187; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2188; SDAG-CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
2189; SDAG-CI-NEXT:    v_mad_f32 v0, -v0, v1, v2
2190; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2191;
2192; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2193; GISEL-GFX9GEN:       ; %bb.0:
2194; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2195; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e64 v3, -v0
2196; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2197; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
2198; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
2199; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2200;
2201; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2202; GISEL-VI:       ; %bb.0:
2203; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2204; GISEL-VI-NEXT:    v_cvt_f32_f16_e64 v3, -v0
2205; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2206; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2207; GISEL-VI-NEXT:    v_mac_f32_e32 v0, v3, v1
2208; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
2209;
2210; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
2211; GISEL-CI:       ; %bb.0:
2212; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2213; GISEL-CI-NEXT:    v_cvt_f32_f16_e64 v3, -v0
2214; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2215; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2216; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v3, v1
2217; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2218  %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2219  %src0 = extractelement <2 x half> %src0.arg.bc, i32 0
2220  %src0.neg = fneg half %src0
2221  %src0.ext = fpext half %src0.neg to float
2222  %src1.ext = fpext half %src1 to float
2223  %src2.ext = fpext half %src2 to float
2224;  %src0.ext.neg = fneg float %src0.ext
2225  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2226  ret float %result
2227}
2228
2229; Make sure we don't fold pre-cvt fneg if we already have a fabs
2230
2231define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2232; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2233; GFX1100:       ; %bb.0:
2234; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2235; GFX1100-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2236; GFX1100-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
2237; GFX1100-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
2238; GFX1100-NEXT:    v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
2239; GFX1100-NEXT:    s_setpc_b64 s[30:31]
2240;
2241; GFX900-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2242; GFX900:       ; %bb.0:
2243; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2244; GFX900-NEXT:    s_mov_b32 s4, 0x8000
2245; GFX900-NEXT:    v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2246; GFX900-NEXT:    v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
2247; GFX900-NEXT:    s_setpc_b64 s[30:31]
2248;
2249; GFX906-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2250; GFX906:       ; %bb.0:
2251; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2252; GFX906-NEXT:    s_mov_b32 s4, 0x8000
2253; GFX906-NEXT:    v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
2254; GFX906-NEXT:    v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
2255; GFX906-NEXT:    s_setpc_b64 s[30:31]
2256;
2257; GFX9GEN-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2258; GFX9GEN:       ; %bb.0:
2259; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2260; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2261; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2262; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
2263; GFX9GEN-NEXT:    v_mad_f32 v0, |v0|, v1, v2
2264; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2265;
2266; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2267; VI:       ; %bb.0:
2268; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2269; VI-NEXT:    v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2270; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2271; VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
2272; VI-NEXT:    v_mad_f32 v0, |v0|, v1, v2
2273; VI-NEXT:    s_setpc_b64 s[30:31]
2274;
2275; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2276; SDAG-CI:       ; %bb.0:
2277; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2278; SDAG-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2279; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
2280; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
2281; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2282;
2283; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
2284; GISEL-CI:       ; %bb.0:
2285; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2286; GISEL-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2287; GISEL-CI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
2288; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2289; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
2290; GISEL-CI-NEXT:    v_mad_f32 v0, |v0|, v1, v2
2291; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2292  %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2293  %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
2294  %src0.neg = fneg half %src0
2295  %src0.ext = fpext half %src0.neg to float
2296  %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext)
2297  %src1.ext = fpext half %src1 to float
2298  %src2.ext = fpext half %src2 to float
2299  %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext)
2300  ret float %result
2301}
2302
2303define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2304; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2305; GFX1100:       ; %bb.0:
2306; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2307; GFX1100-NEXT:    v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2308; GFX1100-NEXT:    s_setpc_b64 s[30:31]
2309;
2310; GFX900-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2311; GFX900:       ; %bb.0:
2312; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2313; GFX900-NEXT:    v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2314; GFX900-NEXT:    s_setpc_b64 s[30:31]
2315;
2316; GFX906-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2317; GFX906:       ; %bb.0:
2318; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2319; GFX906-NEXT:    v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2320; GFX906-NEXT:    s_setpc_b64 s[30:31]
2321;
2322; GFX9GEN-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2323; GFX9GEN:       ; %bb.0:
2324; GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2325; GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2326; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2327; GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
2328; GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
2329; GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2330;
2331; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2332; VI:       ; %bb.0:
2333; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2334; VI-NEXT:    v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2335; VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2336; VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2337; VI-NEXT:    v_mac_f32_e32 v0, v3, v1
2338; VI-NEXT:    s_setpc_b64 s[30:31]
2339;
2340; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2341; SDAG-CI:       ; %bb.0:
2342; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2343; SDAG-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2344; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
2345; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
2346; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2347;
2348; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
2349; GISEL-CI:       ; %bb.0:
2350; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2351; GISEL-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2352; GISEL-CI-NEXT:    v_cvt_f32_f16_e64 v3, |v0|
2353; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2354; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2355; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v3, v1
2356; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2357  %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2358  %src0 = extractelement <2 x half> %src0.arg.bc, i32 1
2359  %src0.abs = call half @llvm.fabs.f16(half %src0)
2360  %src0.ext = fpext half %src0.abs to float
2361  %src1.ext = fpext half %src1 to float
2362  %src2.ext = fpext half %src2 to float
2363  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2364  ret float %result
2365}
2366
2367define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2368; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2369; GFX1100:       ; %bb.0:
2370; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2371; GFX1100-NEXT:    v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2372; GFX1100-NEXT:    s_setpc_b64 s[30:31]
2373;
2374; GFX900-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2375; GFX900:       ; %bb.0:
2376; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2377; GFX900-NEXT:    v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2378; GFX900-NEXT:    s_setpc_b64 s[30:31]
2379;
2380; GFX906-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2381; GFX906:       ; %bb.0:
2382; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2383; GFX906-NEXT:    v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2384; GFX906-NEXT:    s_setpc_b64 s[30:31]
2385;
2386; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2387; SDAG-GFX9GEN:       ; %bb.0:
2388; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2389; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2390; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2391; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
2392; SDAG-GFX9GEN-NEXT:    v_mad_f32 v0, -v0, v1, v2
2393; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2394;
2395; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2396; SDAG-VI:       ; %bb.0:
2397; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2398; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2399; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2400; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
2401; SDAG-VI-NEXT:    v_mad_f32 v0, -v0, v1, v2
2402; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
2403;
2404; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2405; SDAG-CI:       ; %bb.0:
2406; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2407; SDAG-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2408; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v0, -v0
2409; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
2410; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2411;
2412; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2413; GISEL-GFX9GEN:       ; %bb.0:
2414; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2415; GISEL-GFX9GEN-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
2416; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2417; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2418; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
2419; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
2420; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2421;
2422; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2423; GISEL-VI:       ; %bb.0:
2424; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2425; GISEL-VI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
2426; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2427; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2428; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2429; GISEL-VI-NEXT:    v_mac_f32_e32 v0, v3, v1
2430; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
2431;
2432; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
2433; GISEL-CI:       ; %bb.0:
2434; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2435; GISEL-CI-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
2436; GISEL-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2437; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v0
2438; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2439; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2440; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v3, v1
2441; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2442  %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2443  %fneg = fneg <2 x half> %src0.arg.bc
2444  %src0 = extractelement <2 x half> %fneg, i32 1
2445  %src0.ext = fpext half %src0 to float
2446  %src1.ext = fpext half %src1 to float
2447  %src2.ext = fpext half %src2 to float
2448  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2449  ret float %result
2450}
2451
2452define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2453; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2454; GFX1100:       ; %bb.0:
2455; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2456; GFX1100-NEXT:    v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2457; GFX1100-NEXT:    s_setpc_b64 s[30:31]
2458;
2459; GFX900-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2460; GFX900:       ; %bb.0:
2461; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2462; GFX900-NEXT:    v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2463; GFX900-NEXT:    s_setpc_b64 s[30:31]
2464;
2465; GFX906-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2466; GFX906:       ; %bb.0:
2467; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2468; GFX906-NEXT:    v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2469; GFX906-NEXT:    s_setpc_b64 s[30:31]
2470;
2471; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2472; SDAG-GFX9GEN:       ; %bb.0:
2473; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2474; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2475; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2476; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
2477; SDAG-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
2478; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2479;
2480; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2481; SDAG-VI:       ; %bb.0:
2482; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2483; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2484; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2485; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2486; SDAG-VI-NEXT:    v_mac_f32_e32 v0, v3, v1
2487; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
2488;
2489; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2490; SDAG-CI:       ; %bb.0:
2491; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2492; SDAG-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2493; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v0, |v0|
2494; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
2495; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2496;
2497; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2498; GISEL-GFX9GEN:       ; %bb.0:
2499; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2500; GISEL-GFX9GEN-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
2501; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2502; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2503; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
2504; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
2505; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2506;
2507; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2508; GISEL-VI:       ; %bb.0:
2509; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2510; GISEL-VI-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
2511; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2512; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2513; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2514; GISEL-VI-NEXT:    v_mac_f32_e32 v0, v3, v1
2515; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
2516;
2517; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
2518; GISEL-CI:       ; %bb.0:
2519; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2520; GISEL-CI-NEXT:    v_and_b32_e32 v0, 0x7fff7fff, v0
2521; GISEL-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2522; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v0
2523; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2524; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2525; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v3, v1
2526; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2527  %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2528  %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
2529  %src0 = extractelement <2 x half> %fabs, i32 1
2530  %src0.ext = fpext half %src0 to float
2531  %src1.ext = fpext half %src1 to float
2532  %src2.ext = fpext half %src2 to float
2533  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2534  ret float %result
2535}
2536
2537define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 {
2538; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2539; GFX1100:       ; %bb.0:
2540; GFX1100-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2541; GFX1100-NEXT:    v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2542; GFX1100-NEXT:    s_setpc_b64 s[30:31]
2543;
2544; GFX900-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2545; GFX900:       ; %bb.0:
2546; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2547; GFX900-NEXT:    v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2548; GFX900-NEXT:    s_setpc_b64 s[30:31]
2549;
2550; GFX906-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2551; GFX906:       ; %bb.0:
2552; GFX906-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2553; GFX906-NEXT:    v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
2554; GFX906-NEXT:    s_setpc_b64 s[30:31]
2555;
2556; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2557; SDAG-GFX9GEN:       ; %bb.0:
2558; SDAG-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2559; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2560; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2561; SDAG-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v2, v2
2562; SDAG-GFX9GEN-NEXT:    v_mad_f32 v0, -v0, v1, v2
2563; SDAG-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2564;
2565; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2566; SDAG-VI:       ; %bb.0:
2567; SDAG-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2568; SDAG-VI-NEXT:    v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2569; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2570; SDAG-VI-NEXT:    v_cvt_f32_f16_e32 v2, v2
2571; SDAG-VI-NEXT:    v_mad_f32 v0, -v0, v1, v2
2572; SDAG-VI-NEXT:    s_setpc_b64 s[30:31]
2573;
2574; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2575; SDAG-CI:       ; %bb.0:
2576; SDAG-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2577; SDAG-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2578; SDAG-CI-NEXT:    v_cvt_f32_f16_e64 v0, -|v0|
2579; SDAG-CI-NEXT:    v_mad_f32 v0, v0, v1, v2
2580; SDAG-CI-NEXT:    s_setpc_b64 s[30:31]
2581;
2582; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2583; GISEL-GFX9GEN:       ; %bb.0:
2584; GISEL-GFX9GEN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2585; GISEL-GFX9GEN-NEXT:    v_or_b32_e32 v0, 0x80008000, v0
2586; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2587; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v1, v1
2588; GISEL-GFX9GEN-NEXT:    v_cvt_f32_f16_e32 v0, v2
2589; GISEL-GFX9GEN-NEXT:    v_mac_f32_e32 v0, v3, v1
2590; GISEL-GFX9GEN-NEXT:    s_setpc_b64 s[30:31]
2591;
2592; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2593; GISEL-VI:       ; %bb.0:
2594; GISEL-VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2595; GISEL-VI-NEXT:    v_or_b32_e32 v0, 0x80008000, v0
2596; GISEL-VI-NEXT:    v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
2597; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2598; GISEL-VI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2599; GISEL-VI-NEXT:    v_mac_f32_e32 v0, v3, v1
2600; GISEL-VI-NEXT:    s_setpc_b64 s[30:31]
2601;
2602; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
2603; GISEL-CI:       ; %bb.0:
2604; GISEL-CI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2605; GISEL-CI-NEXT:    v_or_b32_e32 v0, 0x80008000, v0
2606; GISEL-CI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
2607; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v3, v0
2608; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
2609; GISEL-CI-NEXT:    v_cvt_f32_f16_e32 v0, v2
2610; GISEL-CI-NEXT:    v_mac_f32_e32 v0, v3, v1
2611; GISEL-CI-NEXT:    s_setpc_b64 s[30:31]
2612  %src0.arg.bc = bitcast i32 %src0.arg to <2 x half>
2613  %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc)
2614  %fneg.fabs = fneg <2 x half> %fabs
2615  %src0 = extractelement <2 x half> %fneg.fabs, i32 1
2616  %src0.ext = fpext half %src0 to float
2617  %src1.ext = fpext half %src1 to float
2618  %src2.ext = fpext half %src2 to float
2619  %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext)
2620  ret float %result
2621}
2622
2623declare half @llvm.fabs.f16(half) #2
2624declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
2625declare float @llvm.fabs.f32(float) #2
2626declare float @llvm.minnum.f32(float, float) #2
2627declare float @llvm.maxnum.f32(float, float) #2
2628declare float @llvm.fmuladd.f32(float, float, float) #2
2629declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
2630
2631attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
2632attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
2633attributes #2 = { nounwind readnone speculatable }
2634