xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-neg-mul.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
8
9; fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
10define float @test_f32_sub_ext_neg_mul(float %x, float %y, float %z) {
11; GFX9-LABEL: test_f32_sub_ext_neg_mul:
12; GFX9:       ; %bb.0: ; %entry
13; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14; GFX9-NEXT:    v_mul_f32_e64 v0, v0, -v1
15; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v2
16; GFX9-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX9-CONTRACT-LABEL: test_f32_sub_ext_neg_mul:
19; GFX9-CONTRACT:       ; %bb.0: ; %entry
20; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, -v1, -v2
22; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
23;
24; GFX9-DENORM-LABEL: test_f32_sub_ext_neg_mul:
25; GFX9-DENORM:       ; %bb.0: ; %entry
26; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
27; GFX9-DENORM-NEXT:    v_mad_f32 v0, v0, -v1, -v2
28; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
29;
30; GFX10-LABEL: test_f32_sub_ext_neg_mul:
31; GFX10:       ; %bb.0: ; %entry
32; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33; GFX10-NEXT:    v_mul_f32_e64 v0, v0, -v1
34; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
35; GFX10-NEXT:    s_setpc_b64 s[30:31]
36;
37; GFX10-CONTRACT-LABEL: test_f32_sub_ext_neg_mul:
38; GFX10-CONTRACT:       ; %bb.0: ; %entry
39; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, v0, -v1, -v2
41; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
42;
43; GFX10-DENORM-LABEL: test_f32_sub_ext_neg_mul:
44; GFX10-DENORM:       ; %bb.0: ; %entry
45; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
46; GFX10-DENORM-NEXT:    v_mad_f32 v0, v0, -v1, -v2
47; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
48entry:
49  %a = fmul float %x, %y
50  %b = fneg float %a
51  %c = fsub float %b, %z
52  ret float %c
53}
54
55define half @test_f16_sub_ext_neg_mul(half %x, half %y, half %z) {
56; GFX9-LABEL: test_f16_sub_ext_neg_mul:
57; GFX9:       ; %bb.0: ; %entry
58; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; GFX9-NEXT:    v_mul_f16_e64 v0, v0, -v1
60; GFX9-NEXT:    v_sub_f16_e32 v0, v0, v2
61; GFX9-NEXT:    s_setpc_b64 s[30:31]
62;
63; GFX9-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
64; GFX9-CONTRACT:       ; %bb.0: ; %entry
65; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, -v1, -v2
67; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
68;
69; GFX9-DENORM-LABEL: test_f16_sub_ext_neg_mul:
70; GFX9-DENORM:       ; %bb.0: ; %entry
71; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72; GFX9-DENORM-NEXT:    v_mad_legacy_f16 v0, v0, -v1, -v2
73; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
74;
75; GFX10-LABEL: test_f16_sub_ext_neg_mul:
76; GFX10:       ; %bb.0: ; %entry
77; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX10-NEXT:    v_mul_f16_e64 v0, v0, -v1
79; GFX10-NEXT:    v_sub_f16_e32 v0, v0, v2
80; GFX10-NEXT:    s_setpc_b64 s[30:31]
81;
82; GFX10-CONTRACT-LABEL: test_f16_sub_ext_neg_mul:
83; GFX10-CONTRACT:       ; %bb.0: ; %entry
84; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, -v1, -v2
86; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
87;
88; GFX10-DENORM-LABEL: test_f16_sub_ext_neg_mul:
89; GFX10-DENORM:       ; %bb.0: ; %entry
90; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GFX10-DENORM-NEXT:    v_mul_f16_e64 v0, v0, -v1
92; GFX10-DENORM-NEXT:    v_sub_f16_e32 v0, v0, v2
93; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
94entry:
95  %a = fmul half %x, %y
96  %b = fneg half %a
97  %c = fsub half %b, %z
98  ret half %c
99}
100
101define double @test_f64_sub_ext_neg_mul(double %x, double %y, double %z) {
102; GFX9-LABEL: test_f64_sub_ext_neg_mul:
103; GFX9:       ; %bb.0: ; %entry
104; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
106; GFX9-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[4:5]
107; GFX9-NEXT:    s_setpc_b64 s[30:31]
108;
109; GFX9-CONTRACT-LABEL: test_f64_sub_ext_neg_mul:
110; GFX9-CONTRACT:       ; %bb.0: ; %entry
111; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
112; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5]
113; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
114;
115; GFX9-DENORM-LABEL: test_f64_sub_ext_neg_mul:
116; GFX9-DENORM:       ; %bb.0: ; %entry
117; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
119; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[4:5]
120; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
121;
122; GFX10-LABEL: test_f64_sub_ext_neg_mul:
123; GFX10:       ; %bb.0: ; %entry
124; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
126; GFX10-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[4:5]
127; GFX10-NEXT:    s_setpc_b64 s[30:31]
128;
129; GFX10-CONTRACT-LABEL: test_f64_sub_ext_neg_mul:
130; GFX10-CONTRACT:       ; %bb.0: ; %entry
131; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], -v[4:5]
133; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
134;
135; GFX10-DENORM-LABEL: test_f64_sub_ext_neg_mul:
136; GFX10-DENORM:       ; %bb.0: ; %entry
137; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
138; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
139; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[4:5]
140; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
141entry:
142  %a = fmul double %x, %y
143  %b = fneg double %a
144  %c = fsub double %b, %z
145  ret double %c
146}
147
148; fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
149define <4 x float> @test_v4f32_sub_ext_neg_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
150; GFX9-LABEL: test_v4f32_sub_ext_neg_mul:
151; GFX9:       ; %bb.0: ; %entry
152; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GFX9-NEXT:    v_mul_f32_e64 v0, v0, -v4
154; GFX9-NEXT:    v_mul_f32_e64 v1, v1, -v5
155; GFX9-NEXT:    v_mul_f32_e64 v2, v2, -v6
156; GFX9-NEXT:    v_mul_f32_e64 v3, v3, -v7
157; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v8
158; GFX9-NEXT:    v_sub_f32_e32 v1, v1, v9
159; GFX9-NEXT:    v_sub_f32_e32 v2, v2, v10
160; GFX9-NEXT:    v_sub_f32_e32 v3, v3, v11
161; GFX9-NEXT:    s_setpc_b64 s[30:31]
162;
163; GFX9-CONTRACT-LABEL: test_v4f32_sub_ext_neg_mul:
164; GFX9-CONTRACT:       ; %bb.0: ; %entry
165; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, -v4, -v8
167; GFX9-CONTRACT-NEXT:    v_fma_f32 v1, v1, -v5, -v9
168; GFX9-CONTRACT-NEXT:    v_fma_f32 v2, v2, -v6, -v10
169; GFX9-CONTRACT-NEXT:    v_fma_f32 v3, v3, -v7, -v11
170; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
171;
172; GFX9-DENORM-LABEL: test_v4f32_sub_ext_neg_mul:
173; GFX9-DENORM:       ; %bb.0: ; %entry
174; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
175; GFX9-DENORM-NEXT:    v_mad_f32 v0, v0, -v4, -v8
176; GFX9-DENORM-NEXT:    v_mad_f32 v1, v1, -v5, -v9
177; GFX9-DENORM-NEXT:    v_mad_f32 v2, v2, -v6, -v10
178; GFX9-DENORM-NEXT:    v_mad_f32 v3, v3, -v7, -v11
179; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
180;
181; GFX10-LABEL: test_v4f32_sub_ext_neg_mul:
182; GFX10:       ; %bb.0: ; %entry
183; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184; GFX10-NEXT:    v_mul_f32_e64 v0, v0, -v4
185; GFX10-NEXT:    v_mul_f32_e64 v1, v1, -v5
186; GFX10-NEXT:    v_mul_f32_e64 v2, v2, -v6
187; GFX10-NEXT:    v_mul_f32_e64 v3, v3, -v7
188; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v8
189; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v9
190; GFX10-NEXT:    v_sub_f32_e32 v2, v2, v10
191; GFX10-NEXT:    v_sub_f32_e32 v3, v3, v11
192; GFX10-NEXT:    s_setpc_b64 s[30:31]
193;
194; GFX10-CONTRACT-LABEL: test_v4f32_sub_ext_neg_mul:
195; GFX10-CONTRACT:       ; %bb.0: ; %entry
196; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, v0, -v4, -v8
198; GFX10-CONTRACT-NEXT:    v_fma_f32 v1, v1, -v5, -v9
199; GFX10-CONTRACT-NEXT:    v_fma_f32 v2, v2, -v6, -v10
200; GFX10-CONTRACT-NEXT:    v_fma_f32 v3, v3, -v7, -v11
201; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
202;
203; GFX10-DENORM-LABEL: test_v4f32_sub_ext_neg_mul:
204; GFX10-DENORM:       ; %bb.0: ; %entry
205; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX10-DENORM-NEXT:    v_mad_f32 v0, v0, -v4, -v8
207; GFX10-DENORM-NEXT:    v_mad_f32 v1, v1, -v5, -v9
208; GFX10-DENORM-NEXT:    v_mad_f32 v2, v2, -v6, -v10
209; GFX10-DENORM-NEXT:    v_mad_f32 v3, v3, -v7, -v11
210; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
211entry:
212  %a = fmul <4 x float> %x, %y
213  %b = fneg <4 x float> %a
214  %c = fsub <4 x float> %b, %z
215  ret <4 x float> %c
216}
217
218define <4 x half> @test_v4f16_sub_ext_neg_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
219; GFX9-LABEL: test_v4f16_sub_ext_neg_mul:
220; GFX9:       ; %bb.0: ; %entry
221; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
223; GFX9-NEXT:    v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
224; GFX9-NEXT:    v_sub_f16_e32 v2, v0, v4
225; GFX9-NEXT:    v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
226; GFX9-NEXT:    v_sub_f16_e32 v3, v1, v5
227; GFX9-NEXT:    v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
228; GFX9-NEXT:    v_pack_b32_f16 v0, v2, v0
229; GFX9-NEXT:    v_pack_b32_f16 v1, v3, v1
230; GFX9-NEXT:    s_setpc_b64 s[30:31]
231;
232; GFX9-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul:
233; GFX9-CONTRACT:       ; %bb.0: ; %entry
234; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
235; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
236; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
237; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
238;
239; GFX9-DENORM-LABEL: test_v4f16_sub_ext_neg_mul:
240; GFX9-DENORM:       ; %bb.0: ; %entry
241; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
242; GFX9-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
243; GFX9-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
244; GFX9-DENORM-NEXT:    v_sub_f16_e32 v2, v0, v4
245; GFX9-DENORM-NEXT:    v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
246; GFX9-DENORM-NEXT:    v_sub_f16_e32 v3, v1, v5
247; GFX9-DENORM-NEXT:    v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
248; GFX9-DENORM-NEXT:    v_pack_b32_f16 v0, v2, v0
249; GFX9-DENORM-NEXT:    v_pack_b32_f16 v1, v3, v1
250; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
251;
252; GFX10-LABEL: test_v4f16_sub_ext_neg_mul:
253; GFX10:       ; %bb.0: ; %entry
254; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255; GFX10-NEXT:    v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
256; GFX10-NEXT:    v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
257; GFX10-NEXT:    v_sub_f16_e32 v2, v0, v4
258; GFX10-NEXT:    v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
259; GFX10-NEXT:    v_sub_f16_e32 v3, v1, v5
260; GFX10-NEXT:    v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
261; GFX10-NEXT:    v_pack_b32_f16 v0, v2, v0
262; GFX10-NEXT:    v_pack_b32_f16 v1, v3, v1
263; GFX10-NEXT:    s_setpc_b64 s[30:31]
264;
265; GFX10-CONTRACT-LABEL: test_v4f16_sub_ext_neg_mul:
266; GFX10-CONTRACT:       ; %bb.0: ; %entry
267; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
268; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,1,1] neg_hi:[0,1,1]
269; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,1,1] neg_hi:[0,1,1]
270; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
271;
272; GFX10-DENORM-LABEL: test_v4f16_sub_ext_neg_mul:
273; GFX10-DENORM:       ; %bb.0: ; %entry
274; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
275; GFX10-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2 neg_lo:[0,1] neg_hi:[0,1]
276; GFX10-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3 neg_lo:[0,1] neg_hi:[0,1]
277; GFX10-DENORM-NEXT:    v_sub_f16_e32 v2, v0, v4
278; GFX10-DENORM-NEXT:    v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
279; GFX10-DENORM-NEXT:    v_sub_f16_e32 v3, v1, v5
280; GFX10-DENORM-NEXT:    v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
281; GFX10-DENORM-NEXT:    v_pack_b32_f16 v0, v2, v0
282; GFX10-DENORM-NEXT:    v_pack_b32_f16 v1, v3, v1
283; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
284entry:
285  %a = fmul <4 x half> %x, %y
286  %b = fneg <4 x half> %a
287  %c = fsub <4 x half> %b, %z
288  ret <4 x half> %c
289}
290
291define <4 x double> @test_v4f64_sub_ext_neg_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
292; GFX9-LABEL: test_v4f64_sub_ext_neg_mul:
293; GFX9:       ; %bb.0: ; %entry
294; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
295; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
296; GFX9-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
297; GFX9-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
298; GFX9-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
299; GFX9-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[16:17]
300; GFX9-NEXT:    v_add_f64 v[2:3], -v[2:3], -v[18:19]
301; GFX9-NEXT:    v_add_f64 v[4:5], -v[4:5], -v[20:21]
302; GFX9-NEXT:    v_add_f64 v[6:7], -v[6:7], -v[22:23]
303; GFX9-NEXT:    s_setpc_b64 s[30:31]
304;
305; GFX9-CONTRACT-LABEL: test_v4f64_sub_ext_neg_mul:
306; GFX9-CONTRACT:       ; %bb.0: ; %entry
307; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], -v[16:17]
309; GFX9-CONTRACT-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], -v[18:19]
310; GFX9-CONTRACT-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[12:13], -v[20:21]
311; GFX9-CONTRACT-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[14:15], -v[22:23]
312; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
313;
314; GFX9-DENORM-LABEL: test_v4f64_sub_ext_neg_mul:
315; GFX9-DENORM:       ; %bb.0: ; %entry
316; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
317; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
318; GFX9-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
319; GFX9-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
320; GFX9-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
321; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[16:17]
322; GFX9-DENORM-NEXT:    v_add_f64 v[2:3], -v[2:3], -v[18:19]
323; GFX9-DENORM-NEXT:    v_add_f64 v[4:5], -v[4:5], -v[20:21]
324; GFX9-DENORM-NEXT:    v_add_f64 v[6:7], -v[6:7], -v[22:23]
325; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
326;
327; GFX10-LABEL: test_v4f64_sub_ext_neg_mul:
328; GFX10:       ; %bb.0: ; %entry
329; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
331; GFX10-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
332; GFX10-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
333; GFX10-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
334; GFX10-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[16:17]
335; GFX10-NEXT:    v_add_f64 v[2:3], -v[2:3], -v[18:19]
336; GFX10-NEXT:    v_add_f64 v[4:5], -v[4:5], -v[20:21]
337; GFX10-NEXT:    v_add_f64 v[6:7], -v[6:7], -v[22:23]
338; GFX10-NEXT:    s_setpc_b64 s[30:31]
339;
340; GFX10-CONTRACT-LABEL: test_v4f64_sub_ext_neg_mul:
341; GFX10-CONTRACT:       ; %bb.0: ; %entry
342; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
343; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], -v[16:17]
344; GFX10-CONTRACT-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], -v[18:19]
345; GFX10-CONTRACT-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[12:13], -v[20:21]
346; GFX10-CONTRACT-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[14:15], -v[22:23]
347; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
348;
349; GFX10-DENORM-LABEL: test_v4f64_sub_ext_neg_mul:
350; GFX10-DENORM:       ; %bb.0: ; %entry
351; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
353; GFX10-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
354; GFX10-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
355; GFX10-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
356; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], -v[0:1], -v[16:17]
357; GFX10-DENORM-NEXT:    v_add_f64 v[2:3], -v[2:3], -v[18:19]
358; GFX10-DENORM-NEXT:    v_add_f64 v[4:5], -v[4:5], -v[20:21]
359; GFX10-DENORM-NEXT:    v_add_f64 v[6:7], -v[6:7], -v[22:23]
360; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
361entry:
362  %a = fmul <4 x double> %x, %y
363  %b = fneg <4 x double> %a
364  %c = fsub <4 x double> %b, %z
365  ret <4 x double> %c
366}
367