xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-sub-mul.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
8; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -fp-contract=fast < %s | FileCheck -check-prefix=GFX11-CONTRACT %s
9; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX11-DENORM %s
10
11; fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
12; fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
13
14define float @test_f32_sub_mul(float %x, float %y, float %z) {
15; GFX9-LABEL: test_f32_sub_mul:
16; GFX9:       ; %bb.0: ; %.entry
17; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1
19; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v2
20; GFX9-NEXT:    s_setpc_b64 s[30:31]
21;
22; GFX9-CONTRACT-LABEL: test_f32_sub_mul:
23; GFX9-CONTRACT:       ; %bb.0: ; %.entry
24; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, -v2
26; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
27;
28; GFX9-DENORM-LABEL: test_f32_sub_mul:
29; GFX9-DENORM:       ; %bb.0: ; %.entry
30; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
31; GFX9-DENORM-NEXT:    v_mad_f32 v0, v0, v1, -v2
32; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX10-LABEL: test_f32_sub_mul:
35; GFX10:       ; %bb.0: ; %.entry
36; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
38; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v2
39; GFX10-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX10-CONTRACT-LABEL: test_f32_sub_mul:
42; GFX10-CONTRACT:       ; %bb.0: ; %.entry
43; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, -v2
45; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
46;
47; GFX10-DENORM-LABEL: test_f32_sub_mul:
48; GFX10-DENORM:       ; %bb.0: ; %.entry
49; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
50; GFX10-DENORM-NEXT:    v_mad_f32 v0, v0, v1, -v2
51; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
52;
53; GFX11-CONTRACT-LABEL: test_f32_sub_mul:
54; GFX11-CONTRACT:       ; %bb.0: ; %.entry
55; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56; GFX11-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, -v2
57; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
58;
59; GFX11-DENORM-LABEL: test_f32_sub_mul:
60; GFX11-DENORM:       ; %bb.0: ; %.entry
61; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
62; GFX11-DENORM-NEXT:    v_mul_f32_e32 v0, v0, v1
63; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_1)
64; GFX11-DENORM-NEXT:    v_sub_f32_e32 v0, v0, v2
65; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
66.entry:
67  %a = fmul float %x, %y
68  %b = fsub float %a, %z
69  ret float %b
70}
71
72define float @test_f32_sub_mul_rhs(float %x, float %y, float %z) {
73; GFX9-LABEL: test_f32_sub_mul_rhs:
74; GFX9:       ; %bb.0: ; %.entry
75; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1
77; GFX9-NEXT:    v_sub_f32_e32 v0, v2, v0
78; GFX9-NEXT:    s_setpc_b64 s[30:31]
79;
80; GFX9-CONTRACT-LABEL: test_f32_sub_mul_rhs:
81; GFX9-CONTRACT:       ; %bb.0: ; %.entry
82; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, -v0, v1, v2
84; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
85;
86; GFX9-DENORM-LABEL: test_f32_sub_mul_rhs:
87; GFX9-DENORM:       ; %bb.0: ; %.entry
88; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89; GFX9-DENORM-NEXT:    v_mad_f32 v0, -v0, v1, v2
90; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
91;
92; GFX10-LABEL: test_f32_sub_mul_rhs:
93; GFX10:       ; %bb.0: ; %.entry
94; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
95; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
96; GFX10-NEXT:    v_sub_f32_e32 v0, v2, v0
97; GFX10-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX10-CONTRACT-LABEL: test_f32_sub_mul_rhs:
100; GFX10-CONTRACT:       ; %bb.0: ; %.entry
101; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, -v0, v1, v2
103; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
104;
105; GFX10-DENORM-LABEL: test_f32_sub_mul_rhs:
106; GFX10-DENORM:       ; %bb.0: ; %.entry
107; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
108; GFX10-DENORM-NEXT:    v_mad_f32 v0, -v0, v1, v2
109; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
110;
111; GFX11-CONTRACT-LABEL: test_f32_sub_mul_rhs:
112; GFX11-CONTRACT:       ; %bb.0: ; %.entry
113; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114; GFX11-CONTRACT-NEXT:    v_fma_f32 v0, -v0, v1, v2
115; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
116;
117; GFX11-DENORM-LABEL: test_f32_sub_mul_rhs:
118; GFX11-DENORM:       ; %bb.0: ; %.entry
119; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
120; GFX11-DENORM-NEXT:    v_mul_f32_e32 v0, v0, v1
121; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_1)
122; GFX11-DENORM-NEXT:    v_sub_f32_e32 v0, v2, v0
123; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
124.entry:
125  %a = fmul float %x, %y
126  %b = fsub float %z, %a
127  ret float %b
128}
129
130define half @test_half_sub_mul(half %x, half %y, half %z) {
131; GFX9-LABEL: test_half_sub_mul:
132; GFX9:       ; %bb.0: ; %.entry
133; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134; GFX9-NEXT:    v_mul_f16_e32 v0, v0, v1
135; GFX9-NEXT:    v_sub_f16_e32 v0, v0, v2
136; GFX9-NEXT:    s_setpc_b64 s[30:31]
137;
138; GFX9-CONTRACT-LABEL: test_half_sub_mul:
139; GFX9-CONTRACT:       ; %bb.0: ; %.entry
140; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, -v2
142; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
143;
144; GFX9-DENORM-LABEL: test_half_sub_mul:
145; GFX9-DENORM:       ; %bb.0: ; %.entry
146; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
147; GFX9-DENORM-NEXT:    v_mad_legacy_f16 v0, v0, v1, -v2
148; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
149;
150; GFX10-LABEL: test_half_sub_mul:
151; GFX10:       ; %bb.0: ; %.entry
152; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
154; GFX10-NEXT:    v_sub_f16_e32 v0, v0, v2
155; GFX10-NEXT:    s_setpc_b64 s[30:31]
156;
157; GFX10-CONTRACT-LABEL: test_half_sub_mul:
158; GFX10-CONTRACT:       ; %bb.0: ; %.entry
159; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
160; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, -v2
161; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
162;
163; GFX10-DENORM-LABEL: test_half_sub_mul:
164; GFX10-DENORM:       ; %bb.0: ; %.entry
165; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
166; GFX10-DENORM-NEXT:    v_mul_f16_e32 v0, v0, v1
167; GFX10-DENORM-NEXT:    v_sub_f16_e32 v0, v0, v2
168; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX11-CONTRACT-LABEL: test_half_sub_mul:
171; GFX11-CONTRACT:       ; %bb.0: ; %.entry
172; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX11-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, -v2
174; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
175;
176; GFX11-DENORM-LABEL: test_half_sub_mul:
177; GFX11-DENORM:       ; %bb.0: ; %.entry
178; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179; GFX11-DENORM-NEXT:    v_mul_f16_e32 v0, v0, v1
180; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_1)
181; GFX11-DENORM-NEXT:    v_sub_f16_e32 v0, v0, v2
182; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
183.entry:
184  %a = fmul half %x, %y
185  %b = fsub half %a, %z
186  ret half %b
187}
188
189define half @test_half_sub_mul_rhs(half %x, half %y, half %z) {
190; GFX9-LABEL: test_half_sub_mul_rhs:
191; GFX9:       ; %bb.0: ; %.entry
192; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193; GFX9-NEXT:    v_mul_f16_e32 v0, v0, v1
194; GFX9-NEXT:    v_sub_f16_e32 v0, v2, v0
195; GFX9-NEXT:    s_setpc_b64 s[30:31]
196;
197; GFX9-CONTRACT-LABEL: test_half_sub_mul_rhs:
198; GFX9-CONTRACT:       ; %bb.0: ; %.entry
199; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, -v0, v1, v2
201; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
202;
203; GFX9-DENORM-LABEL: test_half_sub_mul_rhs:
204; GFX9-DENORM:       ; %bb.0: ; %.entry
205; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206; GFX9-DENORM-NEXT:    v_mad_legacy_f16 v0, -v0, v1, v2
207; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
208;
209; GFX10-LABEL: test_half_sub_mul_rhs:
210; GFX10:       ; %bb.0: ; %.entry
211; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
212; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
213; GFX10-NEXT:    v_sub_f16_e32 v0, v2, v0
214; GFX10-NEXT:    s_setpc_b64 s[30:31]
215;
216; GFX10-CONTRACT-LABEL: test_half_sub_mul_rhs:
217; GFX10-CONTRACT:       ; %bb.0: ; %.entry
218; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, -v0, v1, v2
220; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
221;
222; GFX10-DENORM-LABEL: test_half_sub_mul_rhs:
223; GFX10-DENORM:       ; %bb.0: ; %.entry
224; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX10-DENORM-NEXT:    v_mul_f16_e32 v0, v0, v1
226; GFX10-DENORM-NEXT:    v_sub_f16_e32 v0, v2, v0
227; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
228;
229; GFX11-CONTRACT-LABEL: test_half_sub_mul_rhs:
230; GFX11-CONTRACT:       ; %bb.0: ; %.entry
231; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232; GFX11-CONTRACT-NEXT:    v_fma_f16 v0, -v0, v1, v2
233; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
234;
235; GFX11-DENORM-LABEL: test_half_sub_mul_rhs:
236; GFX11-DENORM:       ; %bb.0: ; %.entry
237; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
238; GFX11-DENORM-NEXT:    v_mul_f16_e32 v0, v0, v1
239; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_1)
240; GFX11-DENORM-NEXT:    v_sub_f16_e32 v0, v2, v0
241; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
242.entry:
243  %a = fmul half %x, %y
244  %b = fsub half %z, %a
245  ret half %b
246}
247
248define double @test_double_sub_mul(double %x, double %y, double %z) {
249; GFX9-LABEL: test_double_sub_mul:
250; GFX9:       ; %bb.0: ; %.entry
251; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
253; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
254; GFX9-NEXT:    s_setpc_b64 s[30:31]
255;
256; GFX9-CONTRACT-LABEL: test_double_sub_mul:
257; GFX9-CONTRACT:       ; %bb.0: ; %.entry
258; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
259; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
260; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
261;
262; GFX9-DENORM-LABEL: test_double_sub_mul:
263; GFX9-DENORM:       ; %bb.0: ; %.entry
264; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
266; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
267; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
268;
269; GFX10-LABEL: test_double_sub_mul:
270; GFX10:       ; %bb.0: ; %.entry
271; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
272; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
273; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
274; GFX10-NEXT:    s_setpc_b64 s[30:31]
275;
276; GFX10-CONTRACT-LABEL: test_double_sub_mul:
277; GFX10-CONTRACT:       ; %bb.0: ; %.entry
278; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
279; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
280; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
281;
282; GFX10-DENORM-LABEL: test_double_sub_mul:
283; GFX10-DENORM:       ; %bb.0: ; %.entry
284; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
285; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
286; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
287; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
288;
289; GFX11-CONTRACT-LABEL: test_double_sub_mul:
290; GFX11-CONTRACT:       ; %bb.0: ; %.entry
291; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292; GFX11-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5]
293; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
294;
295; GFX11-DENORM-LABEL: test_double_sub_mul:
296; GFX11-DENORM:       ; %bb.0: ; %.entry
297; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
298; GFX11-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
299; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_1)
300; GFX11-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], -v[4:5]
301; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
302.entry:
303  %a = fmul double %x, %y
304  %b = fsub double %a, %z
305  ret double %b
306}
307
308define double @test_double_sub_mul_rhs(double %x, double %y, double %z) {
309; GFX9-LABEL: test_double_sub_mul_rhs:
310; GFX9:       ; %bb.0: ; %.entry
311; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
312; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
313; GFX9-NEXT:    v_add_f64 v[0:1], v[4:5], -v[0:1]
314; GFX9-NEXT:    s_setpc_b64 s[30:31]
315;
316; GFX9-CONTRACT-LABEL: test_double_sub_mul_rhs:
317; GFX9-CONTRACT:       ; %bb.0: ; %.entry
318; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
319; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
320; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
321;
322; GFX9-DENORM-LABEL: test_double_sub_mul_rhs:
323; GFX9-DENORM:       ; %bb.0: ; %.entry
324; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
325; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
326; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], v[4:5], -v[0:1]
327; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
328;
329; GFX10-LABEL: test_double_sub_mul_rhs:
330; GFX10:       ; %bb.0: ; %.entry
331; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
333; GFX10-NEXT:    v_add_f64 v[0:1], v[4:5], -v[0:1]
334; GFX10-NEXT:    s_setpc_b64 s[30:31]
335;
336; GFX10-CONTRACT-LABEL: test_double_sub_mul_rhs:
337; GFX10-CONTRACT:       ; %bb.0: ; %.entry
338; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
340; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
341;
342; GFX10-DENORM-LABEL: test_double_sub_mul_rhs:
343; GFX10-DENORM:       ; %bb.0: ; %.entry
344; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
346; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], v[4:5], -v[0:1]
347; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
348;
349; GFX11-CONTRACT-LABEL: test_double_sub_mul_rhs:
350; GFX11-CONTRACT:       ; %bb.0: ; %.entry
351; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352; GFX11-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], v[4:5]
353; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
354;
355; GFX11-DENORM-LABEL: test_double_sub_mul_rhs:
356; GFX11-DENORM:       ; %bb.0: ; %.entry
357; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
358; GFX11-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
359; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_1)
360; GFX11-DENORM-NEXT:    v_add_f64 v[0:1], v[4:5], -v[0:1]
361; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
362.entry:
363  %a = fmul double %x, %y
364  %b = fsub double %z, %a
365  ret double %b
366}
367
368define <4 x float> @test_v4f32_sub_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
369; GFX9-LABEL: test_v4f32_sub_mul:
370; GFX9:       ; %bb.0: ; %.entry
371; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v4
373; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v5
374; GFX9-NEXT:    v_mul_f32_e32 v2, v2, v6
375; GFX9-NEXT:    v_mul_f32_e32 v3, v3, v7
376; GFX9-NEXT:    v_sub_f32_e32 v0, v0, v8
377; GFX9-NEXT:    v_sub_f32_e32 v1, v1, v9
378; GFX9-NEXT:    v_sub_f32_e32 v2, v2, v10
379; GFX9-NEXT:    v_sub_f32_e32 v3, v3, v11
380; GFX9-NEXT:    s_setpc_b64 s[30:31]
381;
382; GFX9-CONTRACT-LABEL: test_v4f32_sub_mul:
383; GFX9-CONTRACT:       ; %bb.0: ; %.entry
384; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, v4, -v8
386; GFX9-CONTRACT-NEXT:    v_fma_f32 v1, v1, v5, -v9
387; GFX9-CONTRACT-NEXT:    v_fma_f32 v2, v2, v6, -v10
388; GFX9-CONTRACT-NEXT:    v_fma_f32 v3, v3, v7, -v11
389; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
390;
391; GFX9-DENORM-LABEL: test_v4f32_sub_mul:
392; GFX9-DENORM:       ; %bb.0: ; %.entry
393; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394; GFX9-DENORM-NEXT:    v_mad_f32 v0, v0, v4, -v8
395; GFX9-DENORM-NEXT:    v_mad_f32 v1, v1, v5, -v9
396; GFX9-DENORM-NEXT:    v_mad_f32 v2, v2, v6, -v10
397; GFX9-DENORM-NEXT:    v_mad_f32 v3, v3, v7, -v11
398; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
399;
400; GFX10-LABEL: test_v4f32_sub_mul:
401; GFX10:       ; %bb.0: ; %.entry
402; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
403; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v4
404; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v5
405; GFX10-NEXT:    v_mul_f32_e32 v2, v2, v6
406; GFX10-NEXT:    v_mul_f32_e32 v3, v3, v7
407; GFX10-NEXT:    v_sub_f32_e32 v0, v0, v8
408; GFX10-NEXT:    v_sub_f32_e32 v1, v1, v9
409; GFX10-NEXT:    v_sub_f32_e32 v2, v2, v10
410; GFX10-NEXT:    v_sub_f32_e32 v3, v3, v11
411; GFX10-NEXT:    s_setpc_b64 s[30:31]
412;
413; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul:
414; GFX10-CONTRACT:       ; %bb.0: ; %.entry
415; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, v0, v4, -v8
417; GFX10-CONTRACT-NEXT:    v_fma_f32 v1, v1, v5, -v9
418; GFX10-CONTRACT-NEXT:    v_fma_f32 v2, v2, v6, -v10
419; GFX10-CONTRACT-NEXT:    v_fma_f32 v3, v3, v7, -v11
420; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
421;
422; GFX10-DENORM-LABEL: test_v4f32_sub_mul:
423; GFX10-DENORM:       ; %bb.0: ; %.entry
424; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
425; GFX10-DENORM-NEXT:    v_mad_f32 v0, v0, v4, -v8
426; GFX10-DENORM-NEXT:    v_mad_f32 v1, v1, v5, -v9
427; GFX10-DENORM-NEXT:    v_mad_f32 v2, v2, v6, -v10
428; GFX10-DENORM-NEXT:    v_mad_f32 v3, v3, v7, -v11
429; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
430;
431; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul:
432; GFX11-CONTRACT:       ; %bb.0: ; %.entry
433; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX11-CONTRACT-NEXT:    v_fma_f32 v0, v0, v4, -v8
435; GFX11-CONTRACT-NEXT:    v_fma_f32 v1, v1, v5, -v9
436; GFX11-CONTRACT-NEXT:    v_fma_f32 v2, v2, v6, -v10
437; GFX11-CONTRACT-NEXT:    v_fma_f32 v3, v3, v7, -v11
438; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
439;
440; GFX11-DENORM-LABEL: test_v4f32_sub_mul:
441; GFX11-DENORM:       ; %bb.0: ; %.entry
442; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443; GFX11-DENORM-NEXT:    v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5
444; GFX11-DENORM-NEXT:    v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7
445; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
446; GFX11-DENORM-NEXT:    v_dual_sub_f32 v0, v0, v8 :: v_dual_sub_f32 v1, v1, v9
447; GFX11-DENORM-NEXT:    v_dual_sub_f32 v2, v2, v10 :: v_dual_sub_f32 v3, v3, v11
448; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
449.entry:
450  %a = fmul <4 x float> %x, %y
451  %b = fsub <4 x float> %a, %z
452  ret <4 x float> %b
453}
454
455define <4 x float> @test_v4f32_sub_mul_rhs(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
456; GFX9-LABEL: test_v4f32_sub_mul_rhs:
457; GFX9:       ; %bb.0: ; %.entry
458; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v4
460; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v5
461; GFX9-NEXT:    v_mul_f32_e32 v2, v2, v6
462; GFX9-NEXT:    v_mul_f32_e32 v3, v3, v7
463; GFX9-NEXT:    v_sub_f32_e32 v0, v8, v0
464; GFX9-NEXT:    v_sub_f32_e32 v1, v9, v1
465; GFX9-NEXT:    v_sub_f32_e32 v2, v10, v2
466; GFX9-NEXT:    v_sub_f32_e32 v3, v11, v3
467; GFX9-NEXT:    s_setpc_b64 s[30:31]
468;
469; GFX9-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
470; GFX9-CONTRACT:       ; %bb.0: ; %.entry
471; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
472; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, -v0, v4, v8
473; GFX9-CONTRACT-NEXT:    v_fma_f32 v1, -v1, v5, v9
474; GFX9-CONTRACT-NEXT:    v_fma_f32 v2, -v2, v6, v10
475; GFX9-CONTRACT-NEXT:    v_fma_f32 v3, -v3, v7, v11
476; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
477;
478; GFX9-DENORM-LABEL: test_v4f32_sub_mul_rhs:
479; GFX9-DENORM:       ; %bb.0: ; %.entry
480; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481; GFX9-DENORM-NEXT:    v_mad_f32 v0, -v0, v4, v8
482; GFX9-DENORM-NEXT:    v_mad_f32 v1, -v1, v5, v9
483; GFX9-DENORM-NEXT:    v_mad_f32 v2, -v2, v6, v10
484; GFX9-DENORM-NEXT:    v_mad_f32 v3, -v3, v7, v11
485; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
486;
487; GFX10-LABEL: test_v4f32_sub_mul_rhs:
488; GFX10:       ; %bb.0: ; %.entry
489; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
490; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v4
491; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v5
492; GFX10-NEXT:    v_mul_f32_e32 v2, v2, v6
493; GFX10-NEXT:    v_mul_f32_e32 v3, v3, v7
494; GFX10-NEXT:    v_sub_f32_e32 v0, v8, v0
495; GFX10-NEXT:    v_sub_f32_e32 v1, v9, v1
496; GFX10-NEXT:    v_sub_f32_e32 v2, v10, v2
497; GFX10-NEXT:    v_sub_f32_e32 v3, v11, v3
498; GFX10-NEXT:    s_setpc_b64 s[30:31]
499;
500; GFX10-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
501; GFX10-CONTRACT:       ; %bb.0: ; %.entry
502; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, -v0, v4, v8
504; GFX10-CONTRACT-NEXT:    v_fma_f32 v1, -v1, v5, v9
505; GFX10-CONTRACT-NEXT:    v_fma_f32 v2, -v2, v6, v10
506; GFX10-CONTRACT-NEXT:    v_fma_f32 v3, -v3, v7, v11
507; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
508;
509; GFX10-DENORM-LABEL: test_v4f32_sub_mul_rhs:
510; GFX10-DENORM:       ; %bb.0: ; %.entry
511; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512; GFX10-DENORM-NEXT:    v_mad_f32 v0, -v0, v4, v8
513; GFX10-DENORM-NEXT:    v_mad_f32 v1, -v1, v5, v9
514; GFX10-DENORM-NEXT:    v_mad_f32 v2, -v2, v6, v10
515; GFX10-DENORM-NEXT:    v_mad_f32 v3, -v3, v7, v11
516; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
517;
518; GFX11-CONTRACT-LABEL: test_v4f32_sub_mul_rhs:
519; GFX11-CONTRACT:       ; %bb.0: ; %.entry
520; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX11-CONTRACT-NEXT:    v_fma_f32 v0, -v0, v4, v8
522; GFX11-CONTRACT-NEXT:    v_fma_f32 v1, -v1, v5, v9
523; GFX11-CONTRACT-NEXT:    v_fma_f32 v2, -v2, v6, v10
524; GFX11-CONTRACT-NEXT:    v_fma_f32 v3, -v3, v7, v11
525; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
526;
527; GFX11-DENORM-LABEL: test_v4f32_sub_mul_rhs:
528; GFX11-DENORM:       ; %bb.0: ; %.entry
529; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
530; GFX11-DENORM-NEXT:    v_dual_mul_f32 v0, v0, v4 :: v_dual_mul_f32 v1, v1, v5
531; GFX11-DENORM-NEXT:    v_dual_mul_f32 v2, v2, v6 :: v_dual_mul_f32 v3, v3, v7
532; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
533; GFX11-DENORM-NEXT:    v_dual_sub_f32 v0, v8, v0 :: v_dual_sub_f32 v1, v9, v1
534; GFX11-DENORM-NEXT:    v_dual_sub_f32 v2, v10, v2 :: v_dual_sub_f32 v3, v11, v3
535; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
536.entry:
537  %a = fmul <4 x float> %x, %y
538  %b = fsub <4 x float> %z, %a
539  ret <4 x float> %b
540}
541
542define <4 x half> @test_v4f16_sub_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
543; GFX9-LABEL: test_v4f16_sub_mul:
544; GFX9:       ; %bb.0: ; %.entry
545; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v2
547; GFX9-NEXT:    v_pk_mul_f16 v1, v1, v3
548; GFX9-NEXT:    v_sub_f16_e32 v2, v0, v4
549; GFX9-NEXT:    v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
550; GFX9-NEXT:    v_sub_f16_e32 v3, v1, v5
551; GFX9-NEXT:    v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
552; GFX9-NEXT:    v_pack_b32_f16 v0, v2, v0
553; GFX9-NEXT:    v_pack_b32_f16 v1, v3, v1
554; GFX9-NEXT:    s_setpc_b64 s[30:31]
555;
556; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul:
557; GFX9-CONTRACT:       ; %bb.0: ; %.entry
558; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
560; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
561; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
562;
563; GFX9-DENORM-LABEL: test_v4f16_sub_mul:
564; GFX9-DENORM:       ; %bb.0: ; %.entry
565; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566; GFX9-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
567; GFX9-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
568; GFX9-DENORM-NEXT:    v_sub_f16_e32 v2, v0, v4
569; GFX9-DENORM-NEXT:    v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
570; GFX9-DENORM-NEXT:    v_sub_f16_e32 v3, v1, v5
571; GFX9-DENORM-NEXT:    v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
572; GFX9-DENORM-NEXT:    v_pack_b32_f16 v0, v2, v0
573; GFX9-DENORM-NEXT:    v_pack_b32_f16 v1, v3, v1
574; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
575;
576; GFX10-LABEL: test_v4f16_sub_mul:
577; GFX10:       ; %bb.0: ; %.entry
578; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579; GFX10-NEXT:    v_pk_mul_f16 v0, v0, v2
580; GFX10-NEXT:    v_pk_mul_f16 v1, v1, v3
581; GFX10-NEXT:    v_sub_f16_e32 v2, v0, v4
582; GFX10-NEXT:    v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
583; GFX10-NEXT:    v_sub_f16_e32 v3, v1, v5
584; GFX10-NEXT:    v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
585; GFX10-NEXT:    v_pack_b32_f16 v0, v2, v0
586; GFX10-NEXT:    v_pack_b32_f16 v1, v3, v1
587; GFX10-NEXT:    s_setpc_b64 s[30:31]
588;
589; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul:
590; GFX10-CONTRACT:       ; %bb.0: ; %.entry
591; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
593; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
594; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
595;
596; GFX10-DENORM-LABEL: test_v4f16_sub_mul:
597; GFX10-DENORM:       ; %bb.0: ; %.entry
598; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599; GFX10-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
600; GFX10-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
601; GFX10-DENORM-NEXT:    v_sub_f16_e32 v2, v0, v4
602; GFX10-DENORM-NEXT:    v_sub_f16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
603; GFX10-DENORM-NEXT:    v_sub_f16_e32 v3, v1, v5
604; GFX10-DENORM-NEXT:    v_sub_f16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
605; GFX10-DENORM-NEXT:    v_pack_b32_f16 v0, v2, v0
606; GFX10-DENORM-NEXT:    v_pack_b32_f16 v1, v3, v1
607; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
608;
609; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul:
610; GFX11-CONTRACT:       ; %bb.0: ; %.entry
611; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612; GFX11-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[0,0,1] neg_hi:[0,0,1]
613; GFX11-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[0,0,1] neg_hi:[0,0,1]
614; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
615;
616; GFX11-DENORM-LABEL: test_v4f16_sub_mul:
617; GFX11-DENORM:       ; %bb.0: ; %.entry
618; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619; GFX11-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
620; GFX11-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
621; GFX11-DENORM-NEXT:    v_lshrrev_b32_e32 v2, 16, v4
622; GFX11-DENORM-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
623; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
624; GFX11-DENORM-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
625; GFX11-DENORM-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
626; GFX11-DENORM-NEXT:    v_sub_f16_e32 v0, v0, v4
627; GFX11-DENORM-NEXT:    v_sub_f16_e32 v1, v1, v5
628; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
629; GFX11-DENORM-NEXT:    v_sub_f16_e32 v2, v6, v2
630; GFX11-DENORM-NEXT:    v_sub_f16_e32 v3, v7, v3
631; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
632; GFX11-DENORM-NEXT:    v_pack_b32_f16 v0, v0, v2
633; GFX11-DENORM-NEXT:    v_pack_b32_f16 v1, v1, v3
634; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
635.entry:
636  %a = fmul <4 x half> %x, %y
637  %b = fsub <4 x half> %a, %z
638  ret <4 x half> %b
639}
640
641define <4 x half> @test_v4f16_sub_mul_rhs(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
642; GFX9-LABEL: test_v4f16_sub_mul_rhs:
643; GFX9:       ; %bb.0: ; %.entry
644; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v2
646; GFX9-NEXT:    v_pk_mul_f16 v1, v1, v3
647; GFX9-NEXT:    v_sub_f16_e32 v2, v4, v0
648; GFX9-NEXT:    v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
649; GFX9-NEXT:    v_sub_f16_e32 v3, v5, v1
650; GFX9-NEXT:    v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
651; GFX9-NEXT:    v_pack_b32_f16 v0, v2, v0
652; GFX9-NEXT:    v_pack_b32_f16 v1, v3, v1
653; GFX9-NEXT:    s_setpc_b64 s[30:31]
654;
655; GFX9-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
656; GFX9-CONTRACT:       ; %bb.0: ; %.entry
657; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
659; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
660; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
661;
662; GFX9-DENORM-LABEL: test_v4f16_sub_mul_rhs:
663; GFX9-DENORM:       ; %bb.0: ; %.entry
664; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
665; GFX9-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
666; GFX9-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
667; GFX9-DENORM-NEXT:    v_sub_f16_e32 v2, v4, v0
668; GFX9-DENORM-NEXT:    v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
669; GFX9-DENORM-NEXT:    v_sub_f16_e32 v3, v5, v1
670; GFX9-DENORM-NEXT:    v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
671; GFX9-DENORM-NEXT:    v_pack_b32_f16 v0, v2, v0
672; GFX9-DENORM-NEXT:    v_pack_b32_f16 v1, v3, v1
673; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
674;
675; GFX10-LABEL: test_v4f16_sub_mul_rhs:
676; GFX10:       ; %bb.0: ; %.entry
677; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678; GFX10-NEXT:    v_pk_mul_f16 v0, v0, v2
679; GFX10-NEXT:    v_pk_mul_f16 v1, v1, v3
680; GFX10-NEXT:    v_sub_f16_e32 v2, v4, v0
681; GFX10-NEXT:    v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
682; GFX10-NEXT:    v_sub_f16_e32 v3, v5, v1
683; GFX10-NEXT:    v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
684; GFX10-NEXT:    v_pack_b32_f16 v0, v2, v0
685; GFX10-NEXT:    v_pack_b32_f16 v1, v3, v1
686; GFX10-NEXT:    s_setpc_b64 s[30:31]
687;
688; GFX10-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
689; GFX10-CONTRACT:       ; %bb.0: ; %.entry
690; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
692; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
693; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
694;
695; GFX10-DENORM-LABEL: test_v4f16_sub_mul_rhs:
696; GFX10-DENORM:       ; %bb.0: ; %.entry
697; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
698; GFX10-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
699; GFX10-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
700; GFX10-DENORM-NEXT:    v_sub_f16_e32 v2, v4, v0
701; GFX10-DENORM-NEXT:    v_sub_f16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
702; GFX10-DENORM-NEXT:    v_sub_f16_e32 v3, v5, v1
703; GFX10-DENORM-NEXT:    v_sub_f16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
704; GFX10-DENORM-NEXT:    v_pack_b32_f16 v0, v2, v0
705; GFX10-DENORM-NEXT:    v_pack_b32_f16 v1, v3, v1
706; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
707;
708; GFX11-CONTRACT-LABEL: test_v4f16_sub_mul_rhs:
709; GFX11-CONTRACT:       ; %bb.0: ; %.entry
710; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
711; GFX11-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4 neg_lo:[1,0,0] neg_hi:[1,0,0]
712; GFX11-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5 neg_lo:[1,0,0] neg_hi:[1,0,0]
713; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
714;
715; GFX11-DENORM-LABEL: test_v4f16_sub_mul_rhs:
716; GFX11-DENORM:       ; %bb.0: ; %.entry
717; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
718; GFX11-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
719; GFX11-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
720; GFX11-DENORM-NEXT:    v_lshrrev_b32_e32 v2, 16, v4
721; GFX11-DENORM-NEXT:    v_lshrrev_b32_e32 v3, 16, v5
722; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
723; GFX11-DENORM-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
724; GFX11-DENORM-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
725; GFX11-DENORM-NEXT:    v_sub_f16_e32 v0, v4, v0
726; GFX11-DENORM-NEXT:    v_sub_f16_e32 v1, v5, v1
727; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
728; GFX11-DENORM-NEXT:    v_sub_f16_e32 v2, v2, v6
729; GFX11-DENORM-NEXT:    v_sub_f16_e32 v3, v3, v7
730; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
731; GFX11-DENORM-NEXT:    v_pack_b32_f16 v0, v0, v2
732; GFX11-DENORM-NEXT:    v_pack_b32_f16 v1, v1, v3
733; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
734.entry:
735  %a = fmul <4 x half> %x, %y
736  %b = fsub <4 x half> %z, %a
737  ret <4 x half> %b
738}
739
740define <4 x double> @test_v4f64_sub_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
741; GFX9-LABEL: test_v4f64_sub_mul:
742; GFX9:       ; %bb.0: ; %.entry
743; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
745; GFX9-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
746; GFX9-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
747; GFX9-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
748; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], -v[16:17]
749; GFX9-NEXT:    v_add_f64 v[2:3], v[2:3], -v[18:19]
750; GFX9-NEXT:    v_add_f64 v[4:5], v[4:5], -v[20:21]
751; GFX9-NEXT:    v_add_f64 v[6:7], v[6:7], -v[22:23]
752; GFX9-NEXT:    s_setpc_b64 s[30:31]
753;
754; GFX9-CONTRACT-LABEL: test_v4f64_sub_mul:
755; GFX9-CONTRACT:       ; %bb.0: ; %.entry
756; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
757; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
758; GFX9-CONTRACT-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
759; GFX9-CONTRACT-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
760; GFX9-CONTRACT-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23]
761; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
762;
763; GFX9-DENORM-LABEL: test_v4f64_sub_mul:
764; GFX9-DENORM:       ; %bb.0: ; %.entry
765; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
766; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
767; GFX9-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
768; GFX9-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
769; GFX9-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
770; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], -v[16:17]
771; GFX9-DENORM-NEXT:    v_add_f64 v[2:3], v[2:3], -v[18:19]
772; GFX9-DENORM-NEXT:    v_add_f64 v[4:5], v[4:5], -v[20:21]
773; GFX9-DENORM-NEXT:    v_add_f64 v[6:7], v[6:7], -v[22:23]
774; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
775;
776; GFX10-LABEL: test_v4f64_sub_mul:
777; GFX10:       ; %bb.0: ; %.entry
778; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
779; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
780; GFX10-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
781; GFX10-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
782; GFX10-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
783; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], -v[16:17]
784; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], -v[18:19]
785; GFX10-NEXT:    v_add_f64 v[4:5], v[4:5], -v[20:21]
786; GFX10-NEXT:    v_add_f64 v[6:7], v[6:7], -v[22:23]
787; GFX10-NEXT:    s_setpc_b64 s[30:31]
788;
789; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul:
790; GFX10-CONTRACT:       ; %bb.0: ; %.entry
791; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
792; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
793; GFX10-CONTRACT-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
794; GFX10-CONTRACT-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
795; GFX10-CONTRACT-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23]
796; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
797;
798; GFX10-DENORM-LABEL: test_v4f64_sub_mul:
799; GFX10-DENORM:       ; %bb.0: ; %.entry
800; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
802; GFX10-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
803; GFX10-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
804; GFX10-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
805; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], -v[16:17]
806; GFX10-DENORM-NEXT:    v_add_f64 v[2:3], v[2:3], -v[18:19]
807; GFX10-DENORM-NEXT:    v_add_f64 v[4:5], v[4:5], -v[20:21]
808; GFX10-DENORM-NEXT:    v_add_f64 v[6:7], v[6:7], -v[22:23]
809; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
810;
811; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul:
812; GFX11-CONTRACT:       ; %bb.0: ; %.entry
813; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814; GFX11-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], -v[16:17]
815; GFX11-CONTRACT-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], -v[18:19]
816; GFX11-CONTRACT-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], -v[20:21]
817; GFX11-CONTRACT-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], -v[22:23]
818; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
819;
820; GFX11-DENORM-LABEL: test_v4f64_sub_mul:
821; GFX11-DENORM:       ; %bb.0: ; %.entry
822; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
823; GFX11-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
824; GFX11-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
825; GFX11-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
826; GFX11-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
827; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
828; GFX11-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], -v[16:17]
829; GFX11-DENORM-NEXT:    v_add_f64 v[2:3], v[2:3], -v[18:19]
830; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
831; GFX11-DENORM-NEXT:    v_add_f64 v[4:5], v[4:5], -v[20:21]
832; GFX11-DENORM-NEXT:    v_add_f64 v[6:7], v[6:7], -v[22:23]
833; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
834.entry:
835  %a = fmul <4 x double> %x, %y
836  %b = fsub <4 x double> %a, %z
837  ret <4 x double> %b
838}
839
840define <4 x double> @test_v4f64_sub_mul_rhs(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
841; GFX9-LABEL: test_v4f64_sub_mul_rhs:
842; GFX9:       ; %bb.0: ; %.entry
843; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
844; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
845; GFX9-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
846; GFX9-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
847; GFX9-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
848; GFX9-NEXT:    v_add_f64 v[0:1], v[16:17], -v[0:1]
849; GFX9-NEXT:    v_add_f64 v[2:3], v[18:19], -v[2:3]
850; GFX9-NEXT:    v_add_f64 v[4:5], v[20:21], -v[4:5]
851; GFX9-NEXT:    v_add_f64 v[6:7], v[22:23], -v[6:7]
852; GFX9-NEXT:    s_setpc_b64 s[30:31]
853;
854; GFX9-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
855; GFX9-CONTRACT:       ; %bb.0: ; %.entry
856; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
857; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
858; GFX9-CONTRACT-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
859; GFX9-CONTRACT-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
860; GFX9-CONTRACT-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23]
861; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
862;
863; GFX9-DENORM-LABEL: test_v4f64_sub_mul_rhs:
864; GFX9-DENORM:       ; %bb.0: ; %.entry
865; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
867; GFX9-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
868; GFX9-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
869; GFX9-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
870; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], v[16:17], -v[0:1]
871; GFX9-DENORM-NEXT:    v_add_f64 v[2:3], v[18:19], -v[2:3]
872; GFX9-DENORM-NEXT:    v_add_f64 v[4:5], v[20:21], -v[4:5]
873; GFX9-DENORM-NEXT:    v_add_f64 v[6:7], v[22:23], -v[6:7]
874; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
875;
876; GFX10-LABEL: test_v4f64_sub_mul_rhs:
877; GFX10:       ; %bb.0: ; %.entry
878; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
879; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
880; GFX10-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
881; GFX10-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
882; GFX10-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
883; GFX10-NEXT:    v_add_f64 v[0:1], v[16:17], -v[0:1]
884; GFX10-NEXT:    v_add_f64 v[2:3], v[18:19], -v[2:3]
885; GFX10-NEXT:    v_add_f64 v[4:5], v[20:21], -v[4:5]
886; GFX10-NEXT:    v_add_f64 v[6:7], v[22:23], -v[6:7]
887; GFX10-NEXT:    s_setpc_b64 s[30:31]
888;
889; GFX10-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
890; GFX10-CONTRACT:       ; %bb.0: ; %.entry
891; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
892; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
893; GFX10-CONTRACT-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
894; GFX10-CONTRACT-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
895; GFX10-CONTRACT-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23]
896; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
897;
898; GFX10-DENORM-LABEL: test_v4f64_sub_mul_rhs:
899; GFX10-DENORM:       ; %bb.0: ; %.entry
900; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
902; GFX10-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
903; GFX10-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
904; GFX10-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
905; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], v[16:17], -v[0:1]
906; GFX10-DENORM-NEXT:    v_add_f64 v[2:3], v[18:19], -v[2:3]
907; GFX10-DENORM-NEXT:    v_add_f64 v[4:5], v[20:21], -v[4:5]
908; GFX10-DENORM-NEXT:    v_add_f64 v[6:7], v[22:23], -v[6:7]
909; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
910;
911; GFX11-CONTRACT-LABEL: test_v4f64_sub_mul_rhs:
912; GFX11-CONTRACT:       ; %bb.0: ; %.entry
913; GFX11-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
914; GFX11-CONTRACT-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], v[16:17]
915; GFX11-CONTRACT-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], v[18:19]
916; GFX11-CONTRACT-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[12:13], v[20:21]
917; GFX11-CONTRACT-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[14:15], v[22:23]
918; GFX11-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
919;
920; GFX11-DENORM-LABEL: test_v4f64_sub_mul_rhs:
921; GFX11-DENORM:       ; %bb.0: ; %.entry
922; GFX11-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
923; GFX11-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
924; GFX11-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
925; GFX11-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
926; GFX11-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
927; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
928; GFX11-DENORM-NEXT:    v_add_f64 v[0:1], v[16:17], -v[0:1]
929; GFX11-DENORM-NEXT:    v_add_f64 v[2:3], v[18:19], -v[2:3]
930; GFX11-DENORM-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
931; GFX11-DENORM-NEXT:    v_add_f64 v[4:5], v[20:21], -v[4:5]
932; GFX11-DENORM-NEXT:    v_add_f64 v[6:7], v[22:23], -v[6:7]
933; GFX11-DENORM-NEXT:    s_setpc_b64 s[30:31]
934.entry:
935  %a = fmul <4 x double> %x, %y
936  %b = fsub <4 x double> %z, %a
937  ret <4 x double> %b
938}
939