xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s
7
8define float @v_fma_f32(float %x, float %y, float %z) {
9; GFX6-LABEL: v_fma_f32:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
13; GFX6-NEXT:    s_setpc_b64 s[30:31]
14;
15; GFX8-LABEL: v_fma_f32:
16; GFX8:       ; %bb.0:
17; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX8-NEXT:    v_fma_f32 v0, v0, v1, v2
19; GFX8-NEXT:    s_setpc_b64 s[30:31]
20;
21; GFX9-LABEL: v_fma_f32:
22; GFX9:       ; %bb.0:
23; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24; GFX9-NEXT:    v_fma_f32 v0, v0, v1, v2
25; GFX9-NEXT:    s_setpc_b64 s[30:31]
26;
27; GFX10-LABEL: v_fma_f32:
28; GFX10:       ; %bb.0:
29; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30; GFX10-NEXT:    v_fma_f32 v0, v0, v1, v2
31; GFX10-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX11-LABEL: v_fma_f32:
34; GFX11:       ; %bb.0:
35; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX11-NEXT:    v_fma_f32 v0, v0, v1, v2
37; GFX11-NEXT:    s_setpc_b64 s[30:31]
38  %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
39  ret float %fma
40}
41
42define <2 x float> @v_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
43; GFX6-LABEL: v_fma_v2f32:
44; GFX6:       ; %bb.0:
45; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX6-NEXT:    v_fma_f32 v0, v0, v2, v4
47; GFX6-NEXT:    v_fma_f32 v1, v1, v3, v5
48; GFX6-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX8-LABEL: v_fma_v2f32:
51; GFX8:       ; %bb.0:
52; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX8-NEXT:    v_fma_f32 v0, v0, v2, v4
54; GFX8-NEXT:    v_fma_f32 v1, v1, v3, v5
55; GFX8-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX9-LABEL: v_fma_v2f32:
58; GFX9:       ; %bb.0:
59; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX9-NEXT:    v_fma_f32 v0, v0, v2, v4
61; GFX9-NEXT:    v_fma_f32 v1, v1, v3, v5
62; GFX9-NEXT:    s_setpc_b64 s[30:31]
63;
64; GFX10-LABEL: v_fma_v2f32:
65; GFX10:       ; %bb.0:
66; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
67; GFX10-NEXT:    v_fma_f32 v0, v0, v2, v4
68; GFX10-NEXT:    v_fma_f32 v1, v1, v3, v5
69; GFX10-NEXT:    s_setpc_b64 s[30:31]
70;
71; GFX11-LABEL: v_fma_v2f32:
72; GFX11:       ; %bb.0:
73; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74; GFX11-NEXT:    v_fma_f32 v0, v0, v2, v4
75; GFX11-NEXT:    v_fma_f32 v1, v1, v3, v5
76; GFX11-NEXT:    s_setpc_b64 s[30:31]
77  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z)
78  ret <2 x float> %fma
79}
80
81define half @v_fma_f16(half %x, half %y, half %z) {
82; GFX6-LABEL: v_fma_f16:
83; GFX6:       ; %bb.0:
84; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
86; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
87; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
88; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
89; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
90; GFX6-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX8-LABEL: v_fma_f16:
93; GFX8:       ; %bb.0:
94; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX8-NEXT:    v_fma_f16 v0, v0, v1, v2
96; GFX8-NEXT:    s_setpc_b64 s[30:31]
97;
98; GFX9-LABEL: v_fma_f16:
99; GFX9:       ; %bb.0:
100; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101; GFX9-NEXT:    v_fma_f16 v0, v0, v1, v2
102; GFX9-NEXT:    s_setpc_b64 s[30:31]
103;
104; GFX10-LABEL: v_fma_f16:
105; GFX10:       ; %bb.0:
106; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
107; GFX10-NEXT:    v_fma_f16 v0, v0, v1, v2
108; GFX10-NEXT:    s_setpc_b64 s[30:31]
109;
110; GFX11-LABEL: v_fma_f16:
111; GFX11:       ; %bb.0:
112; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GFX11-NEXT:    v_fma_f16 v0, v0, v1, v2
114; GFX11-NEXT:    s_setpc_b64 s[30:31]
115  %fma = call half @llvm.fma.f16(half %x, half %y, half %z)
116  ret half %fma
117}
118
119define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) {
120; GFX6-LABEL: v_fma_f16_fneg_lhs:
121; GFX6:       ; %bb.0:
122; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123; GFX6-NEXT:    v_cvt_f32_f16_e64 v0, -v0
124; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
125; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
126; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
127; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
128; GFX6-NEXT:    s_setpc_b64 s[30:31]
129;
130; GFX8-LABEL: v_fma_f16_fneg_lhs:
131; GFX8:       ; %bb.0:
132; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
133; GFX8-NEXT:    v_fma_f16 v0, -v0, v1, v2
134; GFX8-NEXT:    s_setpc_b64 s[30:31]
135;
136; GFX9-LABEL: v_fma_f16_fneg_lhs:
137; GFX9:       ; %bb.0:
138; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
139; GFX9-NEXT:    v_fma_f16 v0, -v0, v1, v2
140; GFX9-NEXT:    s_setpc_b64 s[30:31]
141;
142; GFX10-LABEL: v_fma_f16_fneg_lhs:
143; GFX10:       ; %bb.0:
144; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
145; GFX10-NEXT:    v_fma_f16 v0, -v0, v1, v2
146; GFX10-NEXT:    s_setpc_b64 s[30:31]
147;
148; GFX11-LABEL: v_fma_f16_fneg_lhs:
149; GFX11:       ; %bb.0:
150; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GFX11-NEXT:    v_fma_f16 v0, -v0, v1, v2
152; GFX11-NEXT:    s_setpc_b64 s[30:31]
153  %neg.x = fneg half %x
154  %fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z)
155  ret half %fma
156}
157
158define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) {
159; GFX6-LABEL: v_fma_f16_fneg_rhs:
160; GFX6:       ; %bb.0:
161; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
163; GFX6-NEXT:    v_cvt_f32_f16_e64 v1, -v1
164; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
165; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
166; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
167; GFX6-NEXT:    s_setpc_b64 s[30:31]
168;
169; GFX8-LABEL: v_fma_f16_fneg_rhs:
170; GFX8:       ; %bb.0:
171; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172; GFX8-NEXT:    v_fma_f16 v0, v0, -v1, v2
173; GFX8-NEXT:    s_setpc_b64 s[30:31]
174;
175; GFX9-LABEL: v_fma_f16_fneg_rhs:
176; GFX9:       ; %bb.0:
177; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX9-NEXT:    v_fma_f16 v0, v0, -v1, v2
179; GFX9-NEXT:    s_setpc_b64 s[30:31]
180;
181; GFX10-LABEL: v_fma_f16_fneg_rhs:
182; GFX10:       ; %bb.0:
183; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184; GFX10-NEXT:    v_fma_f16 v0, v0, -v1, v2
185; GFX10-NEXT:    s_setpc_b64 s[30:31]
186;
187; GFX11-LABEL: v_fma_f16_fneg_rhs:
188; GFX11:       ; %bb.0:
189; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190; GFX11-NEXT:    v_fma_f16 v0, v0, -v1, v2
191; GFX11-NEXT:    s_setpc_b64 s[30:31]
192  %neg.y = fneg half %y
193  %fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z)
194  ret half %fma
195}
196
197define half @v_fma_f16_fneg_add(half %x, half %y, half %z) {
198; GFX6-LABEL: v_fma_f16_fneg_add:
199; GFX6:       ; %bb.0:
200; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
201; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
202; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
203; GFX6-NEXT:    v_cvt_f32_f16_e64 v2, -v2
204; GFX6-NEXT:    v_fma_f32 v0, v0, v1, v2
205; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
206; GFX6-NEXT:    s_setpc_b64 s[30:31]
207;
208; GFX8-LABEL: v_fma_f16_fneg_add:
209; GFX8:       ; %bb.0:
210; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
211; GFX8-NEXT:    v_fma_f16 v0, v0, v1, -v2
212; GFX8-NEXT:    s_setpc_b64 s[30:31]
213;
214; GFX9-LABEL: v_fma_f16_fneg_add:
215; GFX9:       ; %bb.0:
216; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
217; GFX9-NEXT:    v_fma_f16 v0, v0, v1, -v2
218; GFX9-NEXT:    s_setpc_b64 s[30:31]
219;
220; GFX10-LABEL: v_fma_f16_fneg_add:
221; GFX10:       ; %bb.0:
222; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223; GFX10-NEXT:    v_fma_f16 v0, v0, v1, -v2
224; GFX10-NEXT:    s_setpc_b64 s[30:31]
225;
226; GFX11-LABEL: v_fma_f16_fneg_add:
227; GFX11:       ; %bb.0:
228; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229; GFX11-NEXT:    v_fma_f16 v0, v0, v1, -v2
230; GFX11-NEXT:    s_setpc_b64 s[30:31]
231  %neg.z = fneg half %z
232  %fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z)
233  ret half %fma
234}
235
236define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
237; GFX6-LABEL: v_fma_v2f16:
238; GFX6:       ; %bb.0:
239; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
241; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
242; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v4
243; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
244; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
245; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v5
246; GFX6-NEXT:    v_fma_f32 v0, v0, v2, v4
247; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
248; GFX6-NEXT:    v_fma_f32 v1, v1, v3, v5
249; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
250; GFX6-NEXT:    s_setpc_b64 s[30:31]
251;
252; GFX8-LABEL: v_fma_v2f16:
253; GFX8:       ; %bb.0:
254; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
256; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
257; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
258; GFX8-NEXT:    v_fma_f16 v0, v0, v1, v2
259; GFX8-NEXT:    v_fma_f16 v1, v3, v4, v5
260; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
261; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
262; GFX8-NEXT:    s_setpc_b64 s[30:31]
263;
264; GFX9-LABEL: v_fma_v2f16:
265; GFX9:       ; %bb.0:
266; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
268; GFX9-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX10-LABEL: v_fma_v2f16:
271; GFX10:       ; %bb.0:
272; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX10-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
274; GFX10-NEXT:    s_setpc_b64 s[30:31]
275;
276; GFX11-LABEL: v_fma_v2f16:
277; GFX11:       ; %bb.0:
278; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
280; GFX11-NEXT:    s_setpc_b64 s[30:31]
281  %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z)
282  ret <2 x half> %fma
283}
284
285define <2 x half> @v_fma_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
286; GFX6-LABEL: v_fma_v2f16_fneg_lhs:
287; GFX6:       ; %bb.0:
288; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
290; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
291; GFX6-NEXT:    v_or_b32_e32 v0, v1, v0
292; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
293; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
294; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
295; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
296; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v4
297; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
298; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
299; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v5
300; GFX6-NEXT:    v_fma_f32 v0, v0, v2, v4
301; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
302; GFX6-NEXT:    v_fma_f32 v1, v1, v3, v5
303; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
304; GFX6-NEXT:    s_setpc_b64 s[30:31]
305;
306; GFX8-LABEL: v_fma_v2f16_fneg_lhs:
307; GFX8:       ; %bb.0:
308; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309; GFX8-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
310; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
311; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
312; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
313; GFX8-NEXT:    v_fma_f16 v0, v0, v1, v2
314; GFX8-NEXT:    v_fma_f16 v1, v3, v4, v5
315; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
316; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
317; GFX8-NEXT:    s_setpc_b64 s[30:31]
318;
319; GFX9-LABEL: v_fma_v2f16_fneg_lhs:
320; GFX9:       ; %bb.0:
321; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
322; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
323; GFX9-NEXT:    s_setpc_b64 s[30:31]
324;
325; GFX10-LABEL: v_fma_v2f16_fneg_lhs:
326; GFX10:       ; %bb.0:
327; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX10-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
329; GFX10-NEXT:    s_setpc_b64 s[30:31]
330;
331; GFX11-LABEL: v_fma_v2f16_fneg_lhs:
332; GFX11:       ; %bb.0:
333; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
334; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0]
335; GFX11-NEXT:    s_setpc_b64 s[30:31]
336  %x.fneg = fneg <2 x half> %x
337  %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y, <2 x half> %z)
338  ret <2 x half> %fma
339}
340
341define <2 x half> @v_fma_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
342; GFX6-LABEL: v_fma_v2f16_fneg_rhs:
343; GFX6:       ; %bb.0:
344; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
346; GFX6-NEXT:    v_and_b32_e32 v2, 0xffff, v2
347; GFX6-NEXT:    v_or_b32_e32 v2, v3, v2
348; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
349; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
350; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
351; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
352; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v4
353; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
354; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
355; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v5
356; GFX6-NEXT:    v_fma_f32 v0, v0, v2, v4
357; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
358; GFX6-NEXT:    v_fma_f32 v1, v1, v3, v5
359; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
360; GFX6-NEXT:    s_setpc_b64 s[30:31]
361;
362; GFX8-LABEL: v_fma_v2f16_fneg_rhs:
363; GFX8:       ; %bb.0:
364; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; GFX8-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
366; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
367; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
368; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
369; GFX8-NEXT:    v_fma_f16 v0, v0, v1, v2
370; GFX8-NEXT:    v_fma_f16 v1, v3, v4, v5
371; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
372; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
373; GFX8-NEXT:    s_setpc_b64 s[30:31]
374;
375; GFX9-LABEL: v_fma_v2f16_fneg_rhs:
376; GFX9:       ; %bb.0:
377; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
379; GFX9-NEXT:    s_setpc_b64 s[30:31]
380;
381; GFX10-LABEL: v_fma_v2f16_fneg_rhs:
382; GFX10:       ; %bb.0:
383; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
384; GFX10-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
385; GFX10-NEXT:    s_setpc_b64 s[30:31]
386;
387; GFX11-LABEL: v_fma_v2f16_fneg_rhs:
388; GFX11:       ; %bb.0:
389; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0]
391; GFX11-NEXT:    s_setpc_b64 s[30:31]
392  %y.fneg = fneg <2 x half> %y
393  %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y.fneg, <2 x half> %z)
394  ret <2 x half> %fma
395}
396
397define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) {
398; GFX6-LABEL: v_fma_v2f16_fneg_lhs_rhs:
399; GFX6:       ; %bb.0:
400; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
401; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
402; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
403; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v4
404; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
405; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
406; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v5
407; GFX6-NEXT:    v_fma_f32 v0, v0, v2, v4
408; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
409; GFX6-NEXT:    v_fma_f32 v1, v1, v3, v5
410; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
411; GFX6-NEXT:    s_setpc_b64 s[30:31]
412;
413; GFX8-LABEL: v_fma_v2f16_fneg_lhs_rhs:
414; GFX8:       ; %bb.0:
415; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
417; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
418; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 16, v2
419; GFX8-NEXT:    v_fma_f16 v0, v0, v1, v2
420; GFX8-NEXT:    v_fma_f16 v1, v3, v4, v5
421; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
422; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
423; GFX8-NEXT:    s_setpc_b64 s[30:31]
424;
425; GFX9-LABEL: v_fma_v2f16_fneg_lhs_rhs:
426; GFX9:       ; %bb.0:
427; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
429; GFX9-NEXT:    s_setpc_b64 s[30:31]
430;
431; GFX10-LABEL: v_fma_v2f16_fneg_lhs_rhs:
432; GFX10:       ; %bb.0:
433; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX10-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
435; GFX10-NEXT:    s_setpc_b64 s[30:31]
436;
437; GFX11-LABEL: v_fma_v2f16_fneg_lhs_rhs:
438; GFX11:       ; %bb.0:
439; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
440; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v1, v2
441; GFX11-NEXT:    s_setpc_b64 s[30:31]
442  %x.fneg = fneg <2 x half> %x
443  %y.fneg = fneg <2 x half> %y
444  %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg, <2 x half> %z)
445  ret <2 x half> %fma
446}
447
448define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) {
449; GFX6-LABEL: v_fma_v3f16:
450; GFX6:       ; %bb.0:
451; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
453; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
454; GFX6-NEXT:    v_cvt_f32_f16_e32 v6, v6
455; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
456; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
457; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v5
458; GFX6-NEXT:    v_fma_f32 v0, v0, v3, v6
459; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v4
460; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v7
461; GFX6-NEXT:    v_cvt_f32_f16_e32 v6, v8
462; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
463; GFX6-NEXT:    v_fma_f32 v1, v1, v3, v4
464; GFX6-NEXT:    v_fma_f32 v2, v2, v5, v6
465; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
466; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
467; GFX6-NEXT:    s_setpc_b64 s[30:31]
468;
469; GFX8-LABEL: v_fma_v3f16:
470; GFX8:       ; %bb.0:
471; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
473; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v2
474; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
475; GFX8-NEXT:    v_fma_f16 v0, v0, v2, v4
476; GFX8-NEXT:    v_fma_f16 v2, v6, v7, v8
477; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
478; GFX8-NEXT:    v_fma_f16 v1, v1, v3, v5
479; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
480; GFX8-NEXT:    s_setpc_b64 s[30:31]
481;
482; GFX9-LABEL: v_fma_v3f16:
483; GFX9:       ; %bb.0:
484; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
485; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
486; GFX9-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
487; GFX9-NEXT:    s_setpc_b64 s[30:31]
488;
489; GFX10-LABEL: v_fma_v3f16:
490; GFX10:       ; %bb.0:
491; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GFX10-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
493; GFX10-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
494; GFX10-NEXT:    s_setpc_b64 s[30:31]
495;
496; GFX11-LABEL: v_fma_v3f16:
497; GFX11:       ; %bb.0:
498; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
499; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
500; GFX11-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
501; GFX11-NEXT:    s_setpc_b64 s[30:31]
502  %fma = call <3 x half> @llvm.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z)
503  ret <3 x half> %fma
504}
505
506define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
507; GFX6-LABEL: v_fma_v4f16:
508; GFX6:       ; %bb.0:
509; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
511; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v4
512; GFX6-NEXT:    v_cvt_f32_f16_e32 v8, v8
513; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
514; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v5
515; GFX6-NEXT:    v_cvt_f32_f16_e32 v9, v9
516; GFX6-NEXT:    v_fma_f32 v0, v0, v4, v8
517; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
518; GFX6-NEXT:    v_cvt_f32_f16_e32 v4, v6
519; GFX6-NEXT:    v_fma_f32 v1, v1, v5, v9
520; GFX6-NEXT:    v_cvt_f32_f16_e32 v5, v10
521; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
522; GFX6-NEXT:    v_cvt_f32_f16_e32 v6, v7
523; GFX6-NEXT:    v_cvt_f32_f16_e32 v7, v11
524; GFX6-NEXT:    v_fma_f32 v2, v2, v4, v5
525; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
526; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
527; GFX6-NEXT:    v_fma_f32 v3, v3, v6, v7
528; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
529; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
530; GFX6-NEXT:    s_setpc_b64 s[30:31]
531;
532; GFX8-LABEL: v_fma_v4f16:
533; GFX8:       ; %bb.0:
534; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
536; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
537; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 16, v4
538; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
539; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
540; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 16, v5
541; GFX8-NEXT:    v_fma_f16 v0, v0, v2, v4
542; GFX8-NEXT:    v_fma_f16 v2, v6, v8, v10
543; GFX8-NEXT:    v_fma_f16 v1, v1, v3, v5
544; GFX8-NEXT:    v_fma_f16 v3, v7, v9, v11
545; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
546; GFX8-NEXT:    v_or_b32_e32 v0, v0, v2
547; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
548; GFX8-NEXT:    v_or_b32_e32 v1, v1, v2
549; GFX8-NEXT:    s_setpc_b64 s[30:31]
550;
551; GFX9-LABEL: v_fma_v4f16:
552; GFX9:       ; %bb.0:
553; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554; GFX9-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
555; GFX9-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
556; GFX9-NEXT:    s_setpc_b64 s[30:31]
557;
558; GFX10-LABEL: v_fma_v4f16:
559; GFX10:       ; %bb.0:
560; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561; GFX10-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
562; GFX10-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
563; GFX10-NEXT:    s_setpc_b64 s[30:31]
564;
565; GFX11-LABEL: v_fma_v4f16:
566; GFX11:       ; %bb.0:
567; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
568; GFX11-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
569; GFX11-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
570; GFX11-NEXT:    s_setpc_b64 s[30:31]
571  %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z)
572  ret <4 x half> %fma
573}
574
575define double @v_fma_f64(double %x, double %y, double %z) {
576; GFX6-LABEL: v_fma_f64:
577; GFX6:       ; %bb.0:
578; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579; GFX6-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
580; GFX6-NEXT:    s_setpc_b64 s[30:31]
581;
582; GFX8-LABEL: v_fma_f64:
583; GFX8:       ; %bb.0:
584; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585; GFX8-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
586; GFX8-NEXT:    s_setpc_b64 s[30:31]
587;
588; GFX9-LABEL: v_fma_f64:
589; GFX9:       ; %bb.0:
590; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
591; GFX9-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
592; GFX9-NEXT:    s_setpc_b64 s[30:31]
593;
594; GFX10-LABEL: v_fma_f64:
595; GFX10:       ; %bb.0:
596; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
598; GFX10-NEXT:    s_setpc_b64 s[30:31]
599;
600; GFX11-LABEL: v_fma_f64:
601; GFX11:       ; %bb.0:
602; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
603; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
604; GFX11-NEXT:    s_setpc_b64 s[30:31]
605  %fma = call double @llvm.fma.f64(double %x, double %y, double %z)
606  ret double %fma
607}
608
609define double @v_fma_f64_fneg_all(double %x, double %y, double %z) {
610; GFX6-LABEL: v_fma_f64_fneg_all:
611; GFX6:       ; %bb.0:
612; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613; GFX6-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
614; GFX6-NEXT:    s_setpc_b64 s[30:31]
615;
616; GFX8-LABEL: v_fma_f64_fneg_all:
617; GFX8:       ; %bb.0:
618; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619; GFX8-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
620; GFX8-NEXT:    s_setpc_b64 s[30:31]
621;
622; GFX9-LABEL: v_fma_f64_fneg_all:
623; GFX9:       ; %bb.0:
624; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
625; GFX9-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
626; GFX9-NEXT:    s_setpc_b64 s[30:31]
627;
628; GFX10-LABEL: v_fma_f64_fneg_all:
629; GFX10:       ; %bb.0:
630; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
631; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
632; GFX10-NEXT:    s_setpc_b64 s[30:31]
633;
634; GFX11-LABEL: v_fma_f64_fneg_all:
635; GFX11:       ; %bb.0:
636; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
637; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
638; GFX11-NEXT:    s_setpc_b64 s[30:31]
639  %neg.x = fneg double %x
640  %neg.y = fneg double %y
641  %neg.z = fneg double %z
642  %fma = call double @llvm.fma.f64(double %neg.x, double %neg.y, double %neg.z)
643  ret double %fma
644}
645
646define <2 x double> @v_fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) {
647; GFX6-LABEL: v_fma_v2f64:
648; GFX6:       ; %bb.0:
649; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650; GFX6-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
651; GFX6-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
652; GFX6-NEXT:    s_setpc_b64 s[30:31]
653;
654; GFX8-LABEL: v_fma_v2f64:
655; GFX8:       ; %bb.0:
656; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657; GFX8-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
658; GFX8-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
659; GFX8-NEXT:    s_setpc_b64 s[30:31]
660;
661; GFX9-LABEL: v_fma_v2f64:
662; GFX9:       ; %bb.0:
663; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
664; GFX9-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
665; GFX9-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
666; GFX9-NEXT:    s_setpc_b64 s[30:31]
667;
668; GFX10-LABEL: v_fma_v2f64:
669; GFX10:       ; %bb.0:
670; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
671; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
672; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
673; GFX10-NEXT:    s_setpc_b64 s[30:31]
674;
675; GFX11-LABEL: v_fma_v2f64:
676; GFX11:       ; %bb.0:
677; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
679; GFX11-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
680; GFX11-NEXT:    s_setpc_b64 s[30:31]
681  %fma = call <2 x double> @llvm.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z)
682  ret <2 x double> %fma
683}
684
685define float @v_fma_f32_fabs_lhs(float %x, float %y, float %z) {
686; GFX6-LABEL: v_fma_f32_fabs_lhs:
687; GFX6:       ; %bb.0:
688; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689; GFX6-NEXT:    v_fma_f32 v0, |v0|, v1, v2
690; GFX6-NEXT:    s_setpc_b64 s[30:31]
691;
692; GFX8-LABEL: v_fma_f32_fabs_lhs:
693; GFX8:       ; %bb.0:
694; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
695; GFX8-NEXT:    v_fma_f32 v0, |v0|, v1, v2
696; GFX8-NEXT:    s_setpc_b64 s[30:31]
697;
698; GFX9-LABEL: v_fma_f32_fabs_lhs:
699; GFX9:       ; %bb.0:
700; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
701; GFX9-NEXT:    v_fma_f32 v0, |v0|, v1, v2
702; GFX9-NEXT:    s_setpc_b64 s[30:31]
703;
704; GFX10-LABEL: v_fma_f32_fabs_lhs:
705; GFX10:       ; %bb.0:
706; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707; GFX10-NEXT:    v_fma_f32 v0, |v0|, v1, v2
708; GFX10-NEXT:    s_setpc_b64 s[30:31]
709;
710; GFX11-LABEL: v_fma_f32_fabs_lhs:
711; GFX11:       ; %bb.0:
712; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
713; GFX11-NEXT:    v_fma_f32 v0, |v0|, v1, v2
714; GFX11-NEXT:    s_setpc_b64 s[30:31]
715  %fabs.x = call float @llvm.fabs.f32(float %x)
716  %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
717  ret float %fma
718}
719
720define float @v_fma_f32_fabs_rhs(float %x, float %y, float %z) {
721; GFX6-LABEL: v_fma_f32_fabs_rhs:
722; GFX6:       ; %bb.0:
723; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
724; GFX6-NEXT:    v_fma_f32 v0, v0, |v1|, v2
725; GFX6-NEXT:    s_setpc_b64 s[30:31]
726;
727; GFX8-LABEL: v_fma_f32_fabs_rhs:
728; GFX8:       ; %bb.0:
729; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730; GFX8-NEXT:    v_fma_f32 v0, v0, |v1|, v2
731; GFX8-NEXT:    s_setpc_b64 s[30:31]
732;
733; GFX9-LABEL: v_fma_f32_fabs_rhs:
734; GFX9:       ; %bb.0:
735; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736; GFX9-NEXT:    v_fma_f32 v0, v0, |v1|, v2
737; GFX9-NEXT:    s_setpc_b64 s[30:31]
738;
739; GFX10-LABEL: v_fma_f32_fabs_rhs:
740; GFX10:       ; %bb.0:
741; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
742; GFX10-NEXT:    v_fma_f32 v0, v0, |v1|, v2
743; GFX10-NEXT:    s_setpc_b64 s[30:31]
744;
745; GFX11-LABEL: v_fma_f32_fabs_rhs:
746; GFX11:       ; %bb.0:
747; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
748; GFX11-NEXT:    v_fma_f32 v0, v0, |v1|, v2
749; GFX11-NEXT:    s_setpc_b64 s[30:31]
750  %fabs.y = call float @llvm.fabs.f32(float %y)
751  %fma = call float @llvm.fma.f32(float %x, float %fabs.y, float %z)
752  ret float %fma
753}
754
755define float @v_fma_f32_fabs_lhs_rhs(float %x, float %y, float %z) {
756; GFX6-LABEL: v_fma_f32_fabs_lhs_rhs:
757; GFX6:       ; %bb.0:
758; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
759; GFX6-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
760; GFX6-NEXT:    s_setpc_b64 s[30:31]
761;
762; GFX8-LABEL: v_fma_f32_fabs_lhs_rhs:
763; GFX8:       ; %bb.0:
764; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
765; GFX8-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
766; GFX8-NEXT:    s_setpc_b64 s[30:31]
767;
768; GFX9-LABEL: v_fma_f32_fabs_lhs_rhs:
769; GFX9:       ; %bb.0:
770; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
771; GFX9-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
772; GFX9-NEXT:    s_setpc_b64 s[30:31]
773;
774; GFX10-LABEL: v_fma_f32_fabs_lhs_rhs:
775; GFX10:       ; %bb.0:
776; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777; GFX10-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
778; GFX10-NEXT:    s_setpc_b64 s[30:31]
779;
780; GFX11-LABEL: v_fma_f32_fabs_lhs_rhs:
781; GFX11:       ; %bb.0:
782; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783; GFX11-NEXT:    v_fma_f32 v0, |v0|, |v1|, v2
784; GFX11-NEXT:    s_setpc_b64 s[30:31]
785  %fabs.x = call float @llvm.fabs.f32(float %x)
786  %fabs.y = call float @llvm.fabs.f32(float %y)
787  %fma = call float @llvm.fma.f32(float %fabs.x, float %fabs.y, float %z)
788  ret float %fma
789}
790
791define amdgpu_ps float @v_fma_f32_sgpr_vgpr_vgpr(float inreg %x, float %y, float %z) {
792; GFX6-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
793; GFX6:       ; %bb.0:
794; GFX6-NEXT:    v_fma_f32 v0, s0, v0, v1
795; GFX6-NEXT:    ; return to shader part epilog
796;
797; GFX8-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
798; GFX8:       ; %bb.0:
799; GFX8-NEXT:    v_fma_f32 v0, s0, v0, v1
800; GFX8-NEXT:    ; return to shader part epilog
801;
802; GFX9-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
803; GFX9:       ; %bb.0:
804; GFX9-NEXT:    v_fma_f32 v0, s0, v0, v1
805; GFX9-NEXT:    ; return to shader part epilog
806;
807; GFX10-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
808; GFX10:       ; %bb.0:
809; GFX10-NEXT:    v_fma_f32 v0, s0, v0, v1
810; GFX10-NEXT:    ; return to shader part epilog
811;
812; GFX11-LABEL: v_fma_f32_sgpr_vgpr_vgpr:
813; GFX11:       ; %bb.0:
814; GFX11-NEXT:    v_fma_f32 v0, s0, v0, v1
815; GFX11-NEXT:    ; return to shader part epilog
816  %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
817  ret float %fma
818}
819
820define amdgpu_ps float @v_fma_f32_vgpr_sgpr_vgpr(float %x, float inreg %y, float %z) {
821; GFX6-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
822; GFX6:       ; %bb.0:
823; GFX6-NEXT:    v_fma_f32 v0, v0, s0, v1
824; GFX6-NEXT:    ; return to shader part epilog
825;
826; GFX8-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
827; GFX8:       ; %bb.0:
828; GFX8-NEXT:    v_fma_f32 v0, v0, s0, v1
829; GFX8-NEXT:    ; return to shader part epilog
830;
831; GFX9-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
832; GFX9:       ; %bb.0:
833; GFX9-NEXT:    v_fma_f32 v0, v0, s0, v1
834; GFX9-NEXT:    ; return to shader part epilog
835;
836; GFX10-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
837; GFX10:       ; %bb.0:
838; GFX10-NEXT:    v_fma_f32 v0, s0, v0, v1
839; GFX10-NEXT:    ; return to shader part epilog
840;
841; GFX11-LABEL: v_fma_f32_vgpr_sgpr_vgpr:
842; GFX11:       ; %bb.0:
843; GFX11-NEXT:    v_fma_f32 v0, s0, v0, v1
844; GFX11-NEXT:    ; return to shader part epilog
845  %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
846  ret float %fma
847}
848
849define amdgpu_ps float @v_fma_f32_sgpr_sgpr_sgpr(float inreg %x, float inreg %y, float inreg %z) {
850; GFX6-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
851; GFX6:       ; %bb.0:
852; GFX6-NEXT:    v_mov_b32_e32 v0, s1
853; GFX6-NEXT:    v_mov_b32_e32 v1, s2
854; GFX6-NEXT:    v_fma_f32 v0, s0, v0, v1
855; GFX6-NEXT:    ; return to shader part epilog
856;
857; GFX8-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
858; GFX8:       ; %bb.0:
859; GFX8-NEXT:    v_mov_b32_e32 v0, s1
860; GFX8-NEXT:    v_mov_b32_e32 v1, s2
861; GFX8-NEXT:    v_fma_f32 v0, s0, v0, v1
862; GFX8-NEXT:    ; return to shader part epilog
863;
864; GFX9-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
865; GFX9:       ; %bb.0:
866; GFX9-NEXT:    v_mov_b32_e32 v0, s1
867; GFX9-NEXT:    v_mov_b32_e32 v1, s2
868; GFX9-NEXT:    v_fma_f32 v0, s0, v0, v1
869; GFX9-NEXT:    ; return to shader part epilog
870;
871; GFX10-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
872; GFX10:       ; %bb.0:
873; GFX10-NEXT:    v_mov_b32_e32 v0, s2
874; GFX10-NEXT:    v_fma_f32 v0, s1, s0, v0
875; GFX10-NEXT:    ; return to shader part epilog
876;
877; GFX11-LABEL: v_fma_f32_sgpr_sgpr_sgpr:
878; GFX11:       ; %bb.0:
879; GFX11-NEXT:    v_mov_b32_e32 v0, s2
880; GFX11-NEXT:    v_fma_f32 v0, s1, s0, v0
881; GFX11-NEXT:    ; return to shader part epilog
882  %fma = call float @llvm.fma.f32(float %x, float %y, float %z)
883  ret float %fma
884}
885
886define float @v_fma_f32_fneg_lhs(float %x, float %y, float %z) {
887; GFX6-LABEL: v_fma_f32_fneg_lhs:
888; GFX6:       ; %bb.0:
889; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890; GFX6-NEXT:    v_fma_f32 v0, -v0, v1, v2
891; GFX6-NEXT:    s_setpc_b64 s[30:31]
892;
893; GFX8-LABEL: v_fma_f32_fneg_lhs:
894; GFX8:       ; %bb.0:
895; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
896; GFX8-NEXT:    v_fma_f32 v0, -v0, v1, v2
897; GFX8-NEXT:    s_setpc_b64 s[30:31]
898;
899; GFX9-LABEL: v_fma_f32_fneg_lhs:
900; GFX9:       ; %bb.0:
901; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
902; GFX9-NEXT:    v_fma_f32 v0, -v0, v1, v2
903; GFX9-NEXT:    s_setpc_b64 s[30:31]
904;
905; GFX10-LABEL: v_fma_f32_fneg_lhs:
906; GFX10:       ; %bb.0:
907; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
908; GFX10-NEXT:    v_fma_f32 v0, -v0, v1, v2
909; GFX10-NEXT:    s_setpc_b64 s[30:31]
910;
911; GFX11-LABEL: v_fma_f32_fneg_lhs:
912; GFX11:       ; %bb.0:
913; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914; GFX11-NEXT:    v_fma_f32 v0, -v0, v1, v2
915; GFX11-NEXT:    s_setpc_b64 s[30:31]
916  %neg.x = fneg float %x
917  %fma = call float @llvm.fma.f32(float %neg.x, float %y, float %z)
918  ret float %fma
919}
920
921define float @v_fma_f32_fneg_rhs(float %x, float %y, float %z) {
922; GFX6-LABEL: v_fma_f32_fneg_rhs:
923; GFX6:       ; %bb.0:
924; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
925; GFX6-NEXT:    v_fma_f32 v0, v0, -v1, v2
926; GFX6-NEXT:    s_setpc_b64 s[30:31]
927;
928; GFX8-LABEL: v_fma_f32_fneg_rhs:
929; GFX8:       ; %bb.0:
930; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
931; GFX8-NEXT:    v_fma_f32 v0, v0, -v1, v2
932; GFX8-NEXT:    s_setpc_b64 s[30:31]
933;
934; GFX9-LABEL: v_fma_f32_fneg_rhs:
935; GFX9:       ; %bb.0:
936; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937; GFX9-NEXT:    v_fma_f32 v0, v0, -v1, v2
938; GFX9-NEXT:    s_setpc_b64 s[30:31]
939;
940; GFX10-LABEL: v_fma_f32_fneg_rhs:
941; GFX10:       ; %bb.0:
942; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
943; GFX10-NEXT:    v_fma_f32 v0, v0, -v1, v2
944; GFX10-NEXT:    s_setpc_b64 s[30:31]
945;
946; GFX11-LABEL: v_fma_f32_fneg_rhs:
947; GFX11:       ; %bb.0:
948; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
949; GFX11-NEXT:    v_fma_f32 v0, v0, -v1, v2
950; GFX11-NEXT:    s_setpc_b64 s[30:31]
951  %neg.y = fneg float %y
952  %fma = call float @llvm.fma.f32(float %x, float %neg.y, float %z)
953  ret float %fma
954}
955
956define float @v_fma_f32_fneg_z(float %x, float %y, float %z) {
957; GFX6-LABEL: v_fma_f32_fneg_z:
958; GFX6:       ; %bb.0:
959; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960; GFX6-NEXT:    v_fma_f32 v0, v0, v1, -v2
961; GFX6-NEXT:    s_setpc_b64 s[30:31]
962;
963; GFX8-LABEL: v_fma_f32_fneg_z:
964; GFX8:       ; %bb.0:
965; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966; GFX8-NEXT:    v_fma_f32 v0, v0, v1, -v2
967; GFX8-NEXT:    s_setpc_b64 s[30:31]
968;
969; GFX9-LABEL: v_fma_f32_fneg_z:
970; GFX9:       ; %bb.0:
971; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972; GFX9-NEXT:    v_fma_f32 v0, v0, v1, -v2
973; GFX9-NEXT:    s_setpc_b64 s[30:31]
974;
975; GFX10-LABEL: v_fma_f32_fneg_z:
976; GFX10:       ; %bb.0:
977; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
978; GFX10-NEXT:    v_fma_f32 v0, v0, v1, -v2
979; GFX10-NEXT:    s_setpc_b64 s[30:31]
980;
981; GFX11-LABEL: v_fma_f32_fneg_z:
982; GFX11:       ; %bb.0:
983; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
984; GFX11-NEXT:    v_fma_f32 v0, v0, v1, -v2
985; GFX11-NEXT:    s_setpc_b64 s[30:31]
986  %neg.z = fneg float %z
987  %fma = call float @llvm.fma.f32(float %x, float %y, float %neg.z)
988  ret float %fma
989}
990
991define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) {
992; GFX6-LABEL: dont_crash_after_fma_mix_select_attempt:
993; GFX6:       ; %bb.0: ; %.entry
994; GFX6-NEXT:    v_fma_f32 v0, |s0|, v0, v1
995; GFX6-NEXT:    ; return to shader part epilog
996;
997; GFX8-LABEL: dont_crash_after_fma_mix_select_attempt:
998; GFX8:       ; %bb.0: ; %.entry
999; GFX8-NEXT:    v_fma_f32 v0, |s0|, v0, v1
1000; GFX8-NEXT:    ; return to shader part epilog
1001;
1002; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt:
1003; GFX9:       ; %bb.0: ; %.entry
1004; GFX9-NEXT:    v_fma_f32 v0, |s0|, v0, v1
1005; GFX9-NEXT:    ; return to shader part epilog
1006;
1007; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt:
1008; GFX10:       ; %bb.0: ; %.entry
1009; GFX10-NEXT:    v_fma_f32 v0, |s0|, v0, v1
1010; GFX10-NEXT:    ; return to shader part epilog
1011;
1012; GFX11-LABEL: dont_crash_after_fma_mix_select_attempt:
1013; GFX11:       ; %bb.0: ; %.entry
1014; GFX11-NEXT:    v_fma_f32 v0, |s0|, v0, v1
1015; GFX11-NEXT:    ; return to shader part epilog
1016.entry:
1017  %fabs.x = call contract float @llvm.fabs.f32(float %x)
1018  %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
1019  ret float %fma
1020}
1021
1022declare half @llvm.fma.f16(half, half, half) #0
1023declare float @llvm.fma.f32(float, float, float) #0
1024declare double @llvm.fma.f64(double, double, double) #0
1025
1026declare half @llvm.fabs.f16(half) #0
1027declare float @llvm.fabs.f32(float) #0
1028
1029declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
1030declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #0
1031declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
1032
1033declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) #0
1034declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) #0
1035
1036attributes #0 = { nounwind readnone speculatable willreturn }
1037