xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GFX9-UNSAFE %s
6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
8; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
9; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GFX10-UNSAFE %s
10
11define float @test_f32_add_mul(float %x, float %y, float %z) {
12; GFX9-LABEL: test_f32_add_mul:
13; GFX9:       ; %bb.0: ; %.entry
14; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
15; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1
16; GFX9-NEXT:    v_add_f32_e32 v0, v0, v2
17; GFX9-NEXT:    s_setpc_b64 s[30:31]
18;
19; GFX9-CONTRACT-LABEL: test_f32_add_mul:
20; GFX9-CONTRACT:       ; %bb.0: ; %.entry
21; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
22; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, v2
23; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
24;
25; GFX9-DENORM-LABEL: test_f32_add_mul:
26; GFX9-DENORM:       ; %bb.0: ; %.entry
27; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX9-DENORM-NEXT:    v_mad_f32 v0, v0, v1, v2
29; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
30;
31; GFX9-UNSAFE-LABEL: test_f32_add_mul:
32; GFX9-UNSAFE:       ; %bb.0: ; %.entry
33; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GFX9-UNSAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
35; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
36;
37; GFX10-LABEL: test_f32_add_mul:
38; GFX10:       ; %bb.0: ; %.entry
39; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
41; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
42; GFX10-NEXT:    s_setpc_b64 s[30:31]
43;
44; GFX10-CONTRACT-LABEL: test_f32_add_mul:
45; GFX10-CONTRACT:       ; %bb.0: ; %.entry
46; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
47; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, v2
48; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX10-DENORM-LABEL: test_f32_add_mul:
51; GFX10-DENORM:       ; %bb.0: ; %.entry
52; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX10-DENORM-NEXT:    v_mad_f32 v0, v0, v1, v2
54; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
55;
56; GFX10-UNSAFE-LABEL: test_f32_add_mul:
57; GFX10-UNSAFE:       ; %bb.0: ; %.entry
58; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59; GFX10-UNSAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
60; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
61.entry:
62  %a = fmul float %x, %y
63  %b = fadd float %a, %z
64  ret float %b
65}
66
67define float @test_f32_add_mul_rhs(float %x, float %y, float %z) {
68; GFX9-LABEL: test_f32_add_mul_rhs:
69; GFX9:       ; %bb.0: ; %.entry
70; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1
72; GFX9-NEXT:    v_add_f32_e32 v0, v2, v0
73; GFX9-NEXT:    s_setpc_b64 s[30:31]
74;
75; GFX9-CONTRACT-LABEL: test_f32_add_mul_rhs:
76; GFX9-CONTRACT:       ; %bb.0: ; %.entry
77; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, v2
79; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
80;
81; GFX9-DENORM-LABEL: test_f32_add_mul_rhs:
82; GFX9-DENORM:       ; %bb.0: ; %.entry
83; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84; GFX9-DENORM-NEXT:    v_mad_f32 v0, v0, v1, v2
85; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX9-UNSAFE-LABEL: test_f32_add_mul_rhs:
88; GFX9-UNSAFE:       ; %bb.0: ; %.entry
89; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX9-UNSAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
91; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
92;
93; GFX10-LABEL: test_f32_add_mul_rhs:
94; GFX10:       ; %bb.0: ; %.entry
95; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
97; GFX10-NEXT:    v_add_f32_e32 v0, v2, v0
98; GFX10-NEXT:    s_setpc_b64 s[30:31]
99;
100; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs:
101; GFX10-CONTRACT:       ; %bb.0: ; %.entry
102; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, v2
104; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
105;
106; GFX10-DENORM-LABEL: test_f32_add_mul_rhs:
107; GFX10-DENORM:       ; %bb.0: ; %.entry
108; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109; GFX10-DENORM-NEXT:    v_mad_f32 v0, v0, v1, v2
110; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
111;
112; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs:
113; GFX10-UNSAFE:       ; %bb.0: ; %.entry
114; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
115; GFX10-UNSAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
116; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
117.entry:
118  %a = fmul float %x, %y
119  %b = fadd float %z, %a
120  ret float %b
121}
122
123define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) {
124; GFX9-LABEL: test_add_mul_multiple_defs_z:
125; GFX9:       ; %bb.0: ; %.entry
126; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127; GFX9-NEXT:    global_load_dword v2, v[2:3], off offset:4
128; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1
129; GFX9-NEXT:    s_waitcnt vmcnt(0)
130; GFX9-NEXT:    v_add_f32_e32 v0, v0, v2
131; GFX9-NEXT:    s_setpc_b64 s[30:31]
132;
133; GFX9-CONTRACT-LABEL: test_add_mul_multiple_defs_z:
134; GFX9-CONTRACT:       ; %bb.0: ; %.entry
135; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; GFX9-CONTRACT-NEXT:    global_load_dword v2, v[2:3], off offset:4
137; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0)
138; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, v2
139; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
140;
141; GFX9-DENORM-LABEL: test_add_mul_multiple_defs_z:
142; GFX9-DENORM:       ; %bb.0: ; %.entry
143; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
144; GFX9-DENORM-NEXT:    global_load_dword v2, v[2:3], off offset:4
145; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0)
146; GFX9-DENORM-NEXT:    v_mac_f32_e32 v2, v0, v1
147; GFX9-DENORM-NEXT:    v_mov_b32_e32 v0, v2
148; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
149;
150; GFX9-UNSAFE-LABEL: test_add_mul_multiple_defs_z:
151; GFX9-UNSAFE:       ; %bb.0: ; %.entry
152; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
153; GFX9-UNSAFE-NEXT:    global_load_dword v2, v[2:3], off offset:4
154; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
155; GFX9-UNSAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
156; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
157;
158; GFX10-LABEL: test_add_mul_multiple_defs_z:
159; GFX10:       ; %bb.0: ; %.entry
160; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
161; GFX10-NEXT:    global_load_dword v2, v[2:3], off offset:4
162; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
163; GFX10-NEXT:    s_waitcnt vmcnt(0)
164; GFX10-NEXT:    v_add_f32_e32 v0, v0, v2
165; GFX10-NEXT:    s_setpc_b64 s[30:31]
166;
167; GFX10-CONTRACT-LABEL: test_add_mul_multiple_defs_z:
168; GFX10-CONTRACT:       ; %bb.0: ; %.entry
169; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170; GFX10-CONTRACT-NEXT:    global_load_dword v2, v[2:3], off offset:4
171; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0)
172; GFX10-CONTRACT-NEXT:    v_fmac_f32_e32 v2, v0, v1
173; GFX10-CONTRACT-NEXT:    v_mov_b32_e32 v0, v2
174; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
175;
176; GFX10-DENORM-LABEL: test_add_mul_multiple_defs_z:
177; GFX10-DENORM:       ; %bb.0: ; %.entry
178; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
179; GFX10-DENORM-NEXT:    global_load_dword v2, v[2:3], off offset:4
180; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0)
181; GFX10-DENORM-NEXT:    v_mac_f32_e32 v2, v0, v1
182; GFX10-DENORM-NEXT:    v_mov_b32_e32 v0, v2
183; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
184;
185; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z:
186; GFX10-UNSAFE:       ; %bb.0: ; %.entry
187; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188; GFX10-UNSAFE-NEXT:    global_load_dword v2, v[2:3], off offset:4
189; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
190; GFX10-UNSAFE-NEXT:    v_fmac_f32_e32 v2, v0, v1
191; GFX10-UNSAFE-NEXT:    v_mov_b32_e32 v0, v2
192; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
193.entry:
194  %a = fmul float %x, %y
195  %vec = load <2 x float>, ptr addrspace(1) %vec_ptr
196  %z = extractelement <2 x float> %vec, i64 1
197  %b = fadd float %a, %z
198  ret float %b
199}
200
201define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) {
202; GFX9-LABEL: test_add_mul_rhs_multiple_defs_z:
203; GFX9:       ; %bb.0: ; %.entry
204; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
205; GFX9-NEXT:    global_load_dword v2, v[2:3], off offset:4
206; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v1
207; GFX9-NEXT:    s_waitcnt vmcnt(0)
208; GFX9-NEXT:    v_add_f32_e32 v0, v2, v0
209; GFX9-NEXT:    s_setpc_b64 s[30:31]
210;
211; GFX9-CONTRACT-LABEL: test_add_mul_rhs_multiple_defs_z:
212; GFX9-CONTRACT:       ; %bb.0: ; %.entry
213; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214; GFX9-CONTRACT-NEXT:    global_load_dword v2, v[2:3], off offset:4
215; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0)
216; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, v1, v2
217; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
218;
219; GFX9-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z:
220; GFX9-DENORM:       ; %bb.0: ; %.entry
221; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
222; GFX9-DENORM-NEXT:    global_load_dword v2, v[2:3], off offset:4
223; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0)
224; GFX9-DENORM-NEXT:    v_mac_f32_e32 v2, v0, v1
225; GFX9-DENORM-NEXT:    v_mov_b32_e32 v0, v2
226; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
227;
228; GFX9-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z:
229; GFX9-UNSAFE:       ; %bb.0: ; %.entry
230; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231; GFX9-UNSAFE-NEXT:    global_load_dword v2, v[2:3], off offset:4
232; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
233; GFX9-UNSAFE-NEXT:    v_fma_f32 v0, v0, v1, v2
234; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
235;
236; GFX10-LABEL: test_add_mul_rhs_multiple_defs_z:
237; GFX10:       ; %bb.0: ; %.entry
238; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239; GFX10-NEXT:    global_load_dword v2, v[2:3], off offset:4
240; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v1
241; GFX10-NEXT:    s_waitcnt vmcnt(0)
242; GFX10-NEXT:    v_add_f32_e32 v0, v2, v0
243; GFX10-NEXT:    s_setpc_b64 s[30:31]
244;
245; GFX10-CONTRACT-LABEL: test_add_mul_rhs_multiple_defs_z:
246; GFX10-CONTRACT:       ; %bb.0: ; %.entry
247; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
248; GFX10-CONTRACT-NEXT:    global_load_dword v2, v[2:3], off offset:4
249; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0)
250; GFX10-CONTRACT-NEXT:    v_fmac_f32_e32 v2, v0, v1
251; GFX10-CONTRACT-NEXT:    v_mov_b32_e32 v0, v2
252; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
253;
254; GFX10-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z:
255; GFX10-DENORM:       ; %bb.0: ; %.entry
256; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257; GFX10-DENORM-NEXT:    global_load_dword v2, v[2:3], off offset:4
258; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0)
259; GFX10-DENORM-NEXT:    v_mac_f32_e32 v2, v0, v1
260; GFX10-DENORM-NEXT:    v_mov_b32_e32 v0, v2
261; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
262;
263; GFX10-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z:
264; GFX10-UNSAFE:       ; %bb.0: ; %.entry
265; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX10-UNSAFE-NEXT:    global_load_dword v2, v[2:3], off offset:4
267; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0)
268; GFX10-UNSAFE-NEXT:    v_fmac_f32_e32 v2, v0, v1
269; GFX10-UNSAFE-NEXT:    v_mov_b32_e32 v0, v2
270; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
271.entry:
272  %a = fmul float %x, %y
273  %vec = load <2 x float>, ptr addrspace(1) %vec_ptr
274  %z = extractelement <2 x float> %vec, i64 1
275  %b = fadd float %z, %a
276  ret float %b
277}
278
279define half @test_half_add_mul(half %x, half %y, half %z) {
280; GFX9-LABEL: test_half_add_mul:
281; GFX9:       ; %bb.0: ; %.entry
282; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283; GFX9-NEXT:    v_mul_f16_e32 v0, v0, v1
284; GFX9-NEXT:    v_add_f16_e32 v0, v0, v2
285; GFX9-NEXT:    s_setpc_b64 s[30:31]
286;
287; GFX9-CONTRACT-LABEL: test_half_add_mul:
288; GFX9-CONTRACT:       ; %bb.0: ; %.entry
289; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
291; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
292;
293; GFX9-DENORM-LABEL: test_half_add_mul:
294; GFX9-DENORM:       ; %bb.0: ; %.entry
295; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296; GFX9-DENORM-NEXT:    v_mad_legacy_f16 v0, v0, v1, v2
297; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
298;
299; GFX9-UNSAFE-LABEL: test_half_add_mul:
300; GFX9-UNSAFE:       ; %bb.0: ; %.entry
301; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302; GFX9-UNSAFE-NEXT:    v_fma_f16 v0, v0, v1, v2
303; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
304;
305; GFX10-LABEL: test_half_add_mul:
306; GFX10:       ; %bb.0: ; %.entry
307; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
308; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
309; GFX10-NEXT:    v_add_f16_e32 v0, v0, v2
310; GFX10-NEXT:    s_setpc_b64 s[30:31]
311;
312; GFX10-CONTRACT-LABEL: test_half_add_mul:
313; GFX10-CONTRACT:       ; %bb.0: ; %.entry
314; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
316; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
317;
318; GFX10-DENORM-LABEL: test_half_add_mul:
319; GFX10-DENORM:       ; %bb.0: ; %.entry
320; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
321; GFX10-DENORM-NEXT:    v_mul_f16_e32 v0, v0, v1
322; GFX10-DENORM-NEXT:    v_add_f16_e32 v0, v0, v2
323; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
324;
325; GFX10-UNSAFE-LABEL: test_half_add_mul:
326; GFX10-UNSAFE:       ; %bb.0: ; %.entry
327; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
328; GFX10-UNSAFE-NEXT:    v_fma_f16 v0, v0, v1, v2
329; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
330.entry:
331  %a = fmul half %x, %y
332  %b = fadd half %a, %z
333  ret half %b
334}
335
336define half @test_half_add_mul_rhs(half %x, half %y, half %z) {
337; GFX9-LABEL: test_half_add_mul_rhs:
338; GFX9:       ; %bb.0: ; %.entry
339; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
340; GFX9-NEXT:    v_mul_f16_e32 v0, v0, v1
341; GFX9-NEXT:    v_add_f16_e32 v0, v2, v0
342; GFX9-NEXT:    s_setpc_b64 s[30:31]
343;
344; GFX9-CONTRACT-LABEL: test_half_add_mul_rhs:
345; GFX9-CONTRACT:       ; %bb.0: ; %.entry
346; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GFX9-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
348; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
349;
350; GFX9-DENORM-LABEL: test_half_add_mul_rhs:
351; GFX9-DENORM:       ; %bb.0: ; %.entry
352; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353; GFX9-DENORM-NEXT:    v_mad_legacy_f16 v0, v0, v1, v2
354; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
355;
356; GFX9-UNSAFE-LABEL: test_half_add_mul_rhs:
357; GFX9-UNSAFE:       ; %bb.0: ; %.entry
358; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
359; GFX9-UNSAFE-NEXT:    v_fma_f16 v0, v0, v1, v2
360; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
361;
362; GFX10-LABEL: test_half_add_mul_rhs:
363; GFX10:       ; %bb.0: ; %.entry
364; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
365; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
366; GFX10-NEXT:    v_add_f16_e32 v0, v2, v0
367; GFX10-NEXT:    s_setpc_b64 s[30:31]
368;
369; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs:
370; GFX10-CONTRACT:       ; %bb.0: ; %.entry
371; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
372; GFX10-CONTRACT-NEXT:    v_fma_f16 v0, v0, v1, v2
373; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
374;
375; GFX10-DENORM-LABEL: test_half_add_mul_rhs:
376; GFX10-DENORM:       ; %bb.0: ; %.entry
377; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
378; GFX10-DENORM-NEXT:    v_mul_f16_e32 v0, v0, v1
379; GFX10-DENORM-NEXT:    v_add_f16_e32 v0, v2, v0
380; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
381;
382; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs:
383; GFX10-UNSAFE:       ; %bb.0: ; %.entry
384; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
385; GFX10-UNSAFE-NEXT:    v_fma_f16 v0, v0, v1, v2
386; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
387.entry:
388  %a = fmul half %x, %y
389  %b = fadd half %z, %a
390  ret half %b
391}
392
393define double @test_double_add_mul(double %x, double %y, double %z) {
394; GFX9-LABEL: test_double_add_mul:
395; GFX9:       ; %bb.0: ; %.entry
396; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
397; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
398; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[4:5]
399; GFX9-NEXT:    s_setpc_b64 s[30:31]
400;
401; GFX9-CONTRACT-LABEL: test_double_add_mul:
402; GFX9-CONTRACT:       ; %bb.0: ; %.entry
403; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
405; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
406;
407; GFX9-DENORM-LABEL: test_double_add_mul:
408; GFX9-DENORM:       ; %bb.0: ; %.entry
409; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
410; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
411; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], v[4:5]
412; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
413;
414; GFX9-UNSAFE-LABEL: test_double_add_mul:
415; GFX9-UNSAFE:       ; %bb.0: ; %.entry
416; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417; GFX9-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
418; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
419;
420; GFX10-LABEL: test_double_add_mul:
421; GFX10:       ; %bb.0: ; %.entry
422; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
424; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], v[4:5]
425; GFX10-NEXT:    s_setpc_b64 s[30:31]
426;
427; GFX10-CONTRACT-LABEL: test_double_add_mul:
428; GFX10-CONTRACT:       ; %bb.0: ; %.entry
429; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
431; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
432;
433; GFX10-DENORM-LABEL: test_double_add_mul:
434; GFX10-DENORM:       ; %bb.0: ; %.entry
435; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
436; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
437; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], v[4:5]
438; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
439;
440; GFX10-UNSAFE-LABEL: test_double_add_mul:
441; GFX10-UNSAFE:       ; %bb.0: ; %.entry
442; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443; GFX10-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
444; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
445.entry:
446  %a = fmul double %x, %y
447  %b = fadd double %a, %z
448  ret double %b
449}
450
451define double @test_double_add_mul_rhs(double %x, double %y, double %z) {
452; GFX9-LABEL: test_double_add_mul_rhs:
453; GFX9:       ; %bb.0: ; %.entry
454; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
456; GFX9-NEXT:    v_add_f64 v[0:1], v[4:5], v[0:1]
457; GFX9-NEXT:    s_setpc_b64 s[30:31]
458;
459; GFX9-CONTRACT-LABEL: test_double_add_mul_rhs:
460; GFX9-CONTRACT:       ; %bb.0: ; %.entry
461; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
462; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
463; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
464;
465; GFX9-DENORM-LABEL: test_double_add_mul_rhs:
466; GFX9-DENORM:       ; %bb.0: ; %.entry
467; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
468; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
469; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], v[4:5], v[0:1]
470; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
471;
472; GFX9-UNSAFE-LABEL: test_double_add_mul_rhs:
473; GFX9-UNSAFE:       ; %bb.0: ; %.entry
474; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475; GFX9-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
476; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
477;
478; GFX10-LABEL: test_double_add_mul_rhs:
479; GFX10:       ; %bb.0: ; %.entry
480; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
482; GFX10-NEXT:    v_add_f64 v[0:1], v[4:5], v[0:1]
483; GFX10-NEXT:    s_setpc_b64 s[30:31]
484;
485; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs:
486; GFX10-CONTRACT:       ; %bb.0: ; %.entry
487; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
488; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
489; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
490;
491; GFX10-DENORM-LABEL: test_double_add_mul_rhs:
492; GFX10-DENORM:       ; %bb.0: ; %.entry
493; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
494; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[2:3]
495; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], v[4:5], v[0:1]
496; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
497;
498; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs:
499; GFX10-UNSAFE:       ; %bb.0: ; %.entry
500; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501; GFX10-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
502; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
503.entry:
504  %a = fmul double %x, %y
505  %b = fadd double %z, %a
506  ret double %b
507}
508
509define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
510; GFX9-LABEL: test_4xfloat_add_mul:
511; GFX9:       ; %bb.0: ; %.entry
512; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
513; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v4
514; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v5
515; GFX9-NEXT:    v_mul_f32_e32 v2, v2, v6
516; GFX9-NEXT:    v_mul_f32_e32 v3, v3, v7
517; GFX9-NEXT:    v_add_f32_e32 v0, v0, v8
518; GFX9-NEXT:    v_add_f32_e32 v1, v1, v9
519; GFX9-NEXT:    v_add_f32_e32 v2, v2, v10
520; GFX9-NEXT:    v_add_f32_e32 v3, v3, v11
521; GFX9-NEXT:    s_setpc_b64 s[30:31]
522;
523; GFX9-CONTRACT-LABEL: test_4xfloat_add_mul:
524; GFX9-CONTRACT:       ; %bb.0: ; %.entry
525; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
526; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, v4, v8
527; GFX9-CONTRACT-NEXT:    v_fma_f32 v1, v1, v5, v9
528; GFX9-CONTRACT-NEXT:    v_fma_f32 v2, v2, v6, v10
529; GFX9-CONTRACT-NEXT:    v_fma_f32 v3, v3, v7, v11
530; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
531;
532; GFX9-DENORM-LABEL: test_4xfloat_add_mul:
533; GFX9-DENORM:       ; %bb.0: ; %.entry
534; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535; GFX9-DENORM-NEXT:    v_mad_f32 v0, v0, v4, v8
536; GFX9-DENORM-NEXT:    v_mad_f32 v1, v1, v5, v9
537; GFX9-DENORM-NEXT:    v_mad_f32 v2, v2, v6, v10
538; GFX9-DENORM-NEXT:    v_mad_f32 v3, v3, v7, v11
539; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
540;
541; GFX9-UNSAFE-LABEL: test_4xfloat_add_mul:
542; GFX9-UNSAFE:       ; %bb.0: ; %.entry
543; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544; GFX9-UNSAFE-NEXT:    v_fma_f32 v0, v0, v4, v8
545; GFX9-UNSAFE-NEXT:    v_fma_f32 v1, v1, v5, v9
546; GFX9-UNSAFE-NEXT:    v_fma_f32 v2, v2, v6, v10
547; GFX9-UNSAFE-NEXT:    v_fma_f32 v3, v3, v7, v11
548; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
549;
550; GFX10-LABEL: test_4xfloat_add_mul:
551; GFX10:       ; %bb.0: ; %.entry
552; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v4
554; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v5
555; GFX10-NEXT:    v_mul_f32_e32 v2, v2, v6
556; GFX10-NEXT:    v_mul_f32_e32 v3, v3, v7
557; GFX10-NEXT:    v_add_f32_e32 v0, v0, v8
558; GFX10-NEXT:    v_add_f32_e32 v1, v1, v9
559; GFX10-NEXT:    v_add_f32_e32 v2, v2, v10
560; GFX10-NEXT:    v_add_f32_e32 v3, v3, v11
561; GFX10-NEXT:    s_setpc_b64 s[30:31]
562;
563; GFX10-CONTRACT-LABEL: test_4xfloat_add_mul:
564; GFX10-CONTRACT:       ; %bb.0: ; %.entry
565; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
566; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, v0, v4, v8
567; GFX10-CONTRACT-NEXT:    v_fma_f32 v1, v1, v5, v9
568; GFX10-CONTRACT-NEXT:    v_fma_f32 v2, v2, v6, v10
569; GFX10-CONTRACT-NEXT:    v_fma_f32 v3, v3, v7, v11
570; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
571;
572; GFX10-DENORM-LABEL: test_4xfloat_add_mul:
573; GFX10-DENORM:       ; %bb.0: ; %.entry
574; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
575; GFX10-DENORM-NEXT:    v_mad_f32 v0, v0, v4, v8
576; GFX10-DENORM-NEXT:    v_mad_f32 v1, v1, v5, v9
577; GFX10-DENORM-NEXT:    v_mad_f32 v2, v2, v6, v10
578; GFX10-DENORM-NEXT:    v_mad_f32 v3, v3, v7, v11
579; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
580;
581; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul:
582; GFX10-UNSAFE:       ; %bb.0: ; %.entry
583; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584; GFX10-UNSAFE-NEXT:    v_fma_f32 v0, v0, v4, v8
585; GFX10-UNSAFE-NEXT:    v_fma_f32 v1, v1, v5, v9
586; GFX10-UNSAFE-NEXT:    v_fma_f32 v2, v2, v6, v10
587; GFX10-UNSAFE-NEXT:    v_fma_f32 v3, v3, v7, v11
588; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
589.entry:
590  %a = fmul <4 x float> %x, %y
591  %b = fadd <4 x float> %a, %z
592  ret <4 x float> %b
593}
594
595define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3 x float> %z) {
596; GFX9-LABEL: test_3xfloat_add_mul_rhs:
597; GFX9:       ; %bb.0: ; %.entry
598; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
599; GFX9-NEXT:    v_mul_f32_e32 v0, v0, v3
600; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v4
601; GFX9-NEXT:    v_mul_f32_e32 v2, v2, v5
602; GFX9-NEXT:    v_add_f32_e32 v0, v6, v0
603; GFX9-NEXT:    v_add_f32_e32 v1, v7, v1
604; GFX9-NEXT:    v_add_f32_e32 v2, v8, v2
605; GFX9-NEXT:    s_setpc_b64 s[30:31]
606;
607; GFX9-CONTRACT-LABEL: test_3xfloat_add_mul_rhs:
608; GFX9-CONTRACT:       ; %bb.0: ; %.entry
609; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
610; GFX9-CONTRACT-NEXT:    v_fma_f32 v0, v0, v3, v6
611; GFX9-CONTRACT-NEXT:    v_fma_f32 v1, v1, v4, v7
612; GFX9-CONTRACT-NEXT:    v_fma_f32 v2, v2, v5, v8
613; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
614;
615; GFX9-DENORM-LABEL: test_3xfloat_add_mul_rhs:
616; GFX9-DENORM:       ; %bb.0: ; %.entry
617; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
618; GFX9-DENORM-NEXT:    v_mad_f32 v0, v0, v3, v6
619; GFX9-DENORM-NEXT:    v_mad_f32 v1, v1, v4, v7
620; GFX9-DENORM-NEXT:    v_mad_f32 v2, v2, v5, v8
621; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
622;
623; GFX9-UNSAFE-LABEL: test_3xfloat_add_mul_rhs:
624; GFX9-UNSAFE:       ; %bb.0: ; %.entry
625; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626; GFX9-UNSAFE-NEXT:    v_fma_f32 v0, v0, v3, v6
627; GFX9-UNSAFE-NEXT:    v_fma_f32 v1, v1, v4, v7
628; GFX9-UNSAFE-NEXT:    v_fma_f32 v2, v2, v5, v8
629; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
630;
631; GFX10-LABEL: test_3xfloat_add_mul_rhs:
632; GFX10:       ; %bb.0: ; %.entry
633; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634; GFX10-NEXT:    v_mul_f32_e32 v0, v0, v3
635; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v4
636; GFX10-NEXT:    v_mul_f32_e32 v2, v2, v5
637; GFX10-NEXT:    v_add_f32_e32 v0, v6, v0
638; GFX10-NEXT:    v_add_f32_e32 v1, v7, v1
639; GFX10-NEXT:    v_add_f32_e32 v2, v8, v2
640; GFX10-NEXT:    s_setpc_b64 s[30:31]
641;
642; GFX10-CONTRACT-LABEL: test_3xfloat_add_mul_rhs:
643; GFX10-CONTRACT:       ; %bb.0: ; %.entry
644; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
645; GFX10-CONTRACT-NEXT:    v_fma_f32 v0, v0, v3, v6
646; GFX10-CONTRACT-NEXT:    v_fma_f32 v1, v1, v4, v7
647; GFX10-CONTRACT-NEXT:    v_fma_f32 v2, v2, v5, v8
648; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
649;
650; GFX10-DENORM-LABEL: test_3xfloat_add_mul_rhs:
651; GFX10-DENORM:       ; %bb.0: ; %.entry
652; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
653; GFX10-DENORM-NEXT:    v_mad_f32 v0, v0, v3, v6
654; GFX10-DENORM-NEXT:    v_mad_f32 v1, v1, v4, v7
655; GFX10-DENORM-NEXT:    v_mad_f32 v2, v2, v5, v8
656; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
657;
658; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs:
659; GFX10-UNSAFE:       ; %bb.0: ; %.entry
660; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
661; GFX10-UNSAFE-NEXT:    v_fma_f32 v0, v0, v3, v6
662; GFX10-UNSAFE-NEXT:    v_fma_f32 v1, v1, v4, v7
663; GFX10-UNSAFE-NEXT:    v_fma_f32 v2, v2, v5, v8
664; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
665.entry:
666  %a = fmul <3 x float> %x, %y
667  %b = fadd <3 x float> %z, %a
668  ret <3 x float> %b
669}
670
671define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
672; GFX9-LABEL: test_4xhalf_add_mul:
673; GFX9:       ; %bb.0: ; %.entry
674; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
675; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v2
676; GFX9-NEXT:    v_pk_mul_f16 v1, v1, v3
677; GFX9-NEXT:    v_pk_add_f16 v0, v0, v4
678; GFX9-NEXT:    v_pk_add_f16 v1, v1, v5
679; GFX9-NEXT:    s_setpc_b64 s[30:31]
680;
681; GFX9-CONTRACT-LABEL: test_4xhalf_add_mul:
682; GFX9-CONTRACT:       ; %bb.0: ; %.entry
683; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
685; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
686; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
687;
688; GFX9-DENORM-LABEL: test_4xhalf_add_mul:
689; GFX9-DENORM:       ; %bb.0: ; %.entry
690; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691; GFX9-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
692; GFX9-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
693; GFX9-DENORM-NEXT:    v_pk_add_f16 v0, v0, v4
694; GFX9-DENORM-NEXT:    v_pk_add_f16 v1, v1, v5
695; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
696;
697; GFX9-UNSAFE-LABEL: test_4xhalf_add_mul:
698; GFX9-UNSAFE:       ; %bb.0: ; %.entry
699; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700; GFX9-UNSAFE-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
701; GFX9-UNSAFE-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
702; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
703;
704; GFX10-LABEL: test_4xhalf_add_mul:
705; GFX10:       ; %bb.0: ; %.entry
706; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
707; GFX10-NEXT:    v_pk_mul_f16 v0, v0, v2
708; GFX10-NEXT:    v_pk_mul_f16 v1, v1, v3
709; GFX10-NEXT:    v_pk_add_f16 v0, v0, v4
710; GFX10-NEXT:    v_pk_add_f16 v1, v1, v5
711; GFX10-NEXT:    s_setpc_b64 s[30:31]
712;
713; GFX10-CONTRACT-LABEL: test_4xhalf_add_mul:
714; GFX10-CONTRACT:       ; %bb.0: ; %.entry
715; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
717; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
718; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
719;
720; GFX10-DENORM-LABEL: test_4xhalf_add_mul:
721; GFX10-DENORM:       ; %bb.0: ; %.entry
722; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723; GFX10-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
724; GFX10-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
725; GFX10-DENORM-NEXT:    v_pk_add_f16 v0, v0, v4
726; GFX10-DENORM-NEXT:    v_pk_add_f16 v1, v1, v5
727; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
728;
729; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul:
730; GFX10-UNSAFE:       ; %bb.0: ; %.entry
731; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
732; GFX10-UNSAFE-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
733; GFX10-UNSAFE-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
734; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
735.entry:
736  %a = fmul <4 x half> %x, %y
737  %b = fadd <4 x half> %a, %z
738  ret <4 x half> %b
739}
740
741define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x half> %z) {
742; GFX9-LABEL: test_3xhalf_add_mul_rhs:
743; GFX9:       ; %bb.0: ; %.entry
744; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
745; GFX9-NEXT:    v_pk_mul_f16 v0, v0, v2
746; GFX9-NEXT:    v_pk_mul_f16 v1, v1, v3
747; GFX9-NEXT:    v_pk_add_f16 v0, v4, v0
748; GFX9-NEXT:    v_pk_add_f16 v1, v5, v1
749; GFX9-NEXT:    s_setpc_b64 s[30:31]
750;
751; GFX9-CONTRACT-LABEL: test_3xhalf_add_mul_rhs:
752; GFX9-CONTRACT:       ; %bb.0: ; %.entry
753; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
754; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
755; GFX9-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
756; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
757;
758; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs:
759; GFX9-DENORM:       ; %bb.0: ; %.entry
760; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
761; GFX9-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
762; GFX9-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
763; GFX9-DENORM-NEXT:    v_pk_add_f16 v0, v4, v0
764; GFX9-DENORM-NEXT:    v_pk_add_f16 v1, v5, v1
765; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
766;
767; GFX9-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
768; GFX9-UNSAFE:       ; %bb.0: ; %.entry
769; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
770; GFX9-UNSAFE-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
771; GFX9-UNSAFE-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
772; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
773;
774; GFX10-LABEL: test_3xhalf_add_mul_rhs:
775; GFX10:       ; %bb.0: ; %.entry
776; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777; GFX10-NEXT:    v_pk_mul_f16 v0, v0, v2
778; GFX10-NEXT:    v_pk_mul_f16 v1, v1, v3
779; GFX10-NEXT:    v_pk_add_f16 v0, v4, v0
780; GFX10-NEXT:    v_pk_add_f16 v1, v5, v1
781; GFX10-NEXT:    s_setpc_b64 s[30:31]
782;
783; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs:
784; GFX10-CONTRACT:       ; %bb.0: ; %.entry
785; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
786; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
787; GFX10-CONTRACT-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
788; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
789;
790; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs:
791; GFX10-DENORM:       ; %bb.0: ; %.entry
792; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793; GFX10-DENORM-NEXT:    v_pk_mul_f16 v0, v0, v2
794; GFX10-DENORM-NEXT:    v_pk_mul_f16 v1, v1, v3
795; GFX10-DENORM-NEXT:    v_pk_add_f16 v0, v4, v0
796; GFX10-DENORM-NEXT:    v_pk_add_f16 v1, v5, v1
797; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
798;
799; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
800; GFX10-UNSAFE:       ; %bb.0: ; %.entry
801; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
802; GFX10-UNSAFE-NEXT:    v_pk_fma_f16 v0, v0, v2, v4
803; GFX10-UNSAFE-NEXT:    v_pk_fma_f16 v1, v1, v3, v5
804; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
805.entry:
806  %a = fmul <3 x half> %x, %y
807  %b = fadd <3 x half> %z, %a
808  ret <3 x half> %b
809}
810
811define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
812; GFX9-LABEL: test_4xdouble_add_mul:
813; GFX9:       ; %bb.0: ; %.entry
814; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
815; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
816; GFX9-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
817; GFX9-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
818; GFX9-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
819; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[16:17]
820; GFX9-NEXT:    v_add_f64 v[2:3], v[2:3], v[18:19]
821; GFX9-NEXT:    v_add_f64 v[4:5], v[4:5], v[20:21]
822; GFX9-NEXT:    v_add_f64 v[6:7], v[6:7], v[22:23]
823; GFX9-NEXT:    s_setpc_b64 s[30:31]
824;
825; GFX9-CONTRACT-LABEL: test_4xdouble_add_mul:
826; GFX9-CONTRACT:       ; %bb.0: ; %.entry
827; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
828; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
829; GFX9-CONTRACT-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
830; GFX9-CONTRACT-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
831; GFX9-CONTRACT-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
832; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
833;
834; GFX9-DENORM-LABEL: test_4xdouble_add_mul:
835; GFX9-DENORM:       ; %bb.0: ; %.entry
836; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
838; GFX9-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
839; GFX9-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
840; GFX9-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
841; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], v[16:17]
842; GFX9-DENORM-NEXT:    v_add_f64 v[2:3], v[2:3], v[18:19]
843; GFX9-DENORM-NEXT:    v_add_f64 v[4:5], v[4:5], v[20:21]
844; GFX9-DENORM-NEXT:    v_add_f64 v[6:7], v[6:7], v[22:23]
845; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
846;
847; GFX9-UNSAFE-LABEL: test_4xdouble_add_mul:
848; GFX9-UNSAFE:       ; %bb.0: ; %.entry
849; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GFX9-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
851; GFX9-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
852; GFX9-UNSAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
853; GFX9-UNSAFE-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
854; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
855;
856; GFX10-LABEL: test_4xdouble_add_mul:
857; GFX10:       ; %bb.0: ; %.entry
858; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
859; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
860; GFX10-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
861; GFX10-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
862; GFX10-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
863; GFX10-NEXT:    v_add_f64 v[0:1], v[0:1], v[16:17]
864; GFX10-NEXT:    v_add_f64 v[2:3], v[2:3], v[18:19]
865; GFX10-NEXT:    v_add_f64 v[4:5], v[4:5], v[20:21]
866; GFX10-NEXT:    v_add_f64 v[6:7], v[6:7], v[22:23]
867; GFX10-NEXT:    s_setpc_b64 s[30:31]
868;
869; GFX10-CONTRACT-LABEL: test_4xdouble_add_mul:
870; GFX10-CONTRACT:       ; %bb.0: ; %.entry
871; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
872; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
873; GFX10-CONTRACT-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
874; GFX10-CONTRACT-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
875; GFX10-CONTRACT-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
876; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
877;
878; GFX10-DENORM-LABEL: test_4xdouble_add_mul:
879; GFX10-DENORM:       ; %bb.0: ; %.entry
880; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[8:9]
882; GFX10-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[10:11]
883; GFX10-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[12:13]
884; GFX10-DENORM-NEXT:    v_mul_f64 v[6:7], v[6:7], v[14:15]
885; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], v[0:1], v[16:17]
886; GFX10-DENORM-NEXT:    v_add_f64 v[2:3], v[2:3], v[18:19]
887; GFX10-DENORM-NEXT:    v_add_f64 v[4:5], v[4:5], v[20:21]
888; GFX10-DENORM-NEXT:    v_add_f64 v[6:7], v[6:7], v[22:23]
889; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
890;
891; GFX10-UNSAFE-LABEL: test_4xdouble_add_mul:
892; GFX10-UNSAFE:       ; %bb.0: ; %.entry
893; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
894; GFX10-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
895; GFX10-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
896; GFX10-UNSAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
897; GFX10-UNSAFE-NEXT:    v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
898; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
899.entry:
900  %a = fmul <4 x double> %x, %y
901  %b = fadd <4 x double> %a, %z
902  ret <4 x double> %b
903}
904
905define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, <3 x double> %z) {
906; GFX9-LABEL: test_3xdouble_add_mul_rhs:
907; GFX9:       ; %bb.0: ; %.entry
908; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909; GFX9-NEXT:    v_mul_f64 v[0:1], v[0:1], v[6:7]
910; GFX9-NEXT:    v_mul_f64 v[2:3], v[2:3], v[8:9]
911; GFX9-NEXT:    v_mul_f64 v[4:5], v[4:5], v[10:11]
912; GFX9-NEXT:    v_add_f64 v[0:1], v[12:13], v[0:1]
913; GFX9-NEXT:    v_add_f64 v[2:3], v[14:15], v[2:3]
914; GFX9-NEXT:    v_add_f64 v[4:5], v[16:17], v[4:5]
915; GFX9-NEXT:    s_setpc_b64 s[30:31]
916;
917; GFX9-CONTRACT-LABEL: test_3xdouble_add_mul_rhs:
918; GFX9-CONTRACT:       ; %bb.0: ; %.entry
919; GFX9-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
920; GFX9-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
921; GFX9-CONTRACT-NEXT:    v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
922; GFX9-CONTRACT-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
923; GFX9-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
924;
925; GFX9-DENORM-LABEL: test_3xdouble_add_mul_rhs:
926; GFX9-DENORM:       ; %bb.0: ; %.entry
927; GFX9-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928; GFX9-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[6:7]
929; GFX9-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[8:9]
930; GFX9-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[10:11]
931; GFX9-DENORM-NEXT:    v_add_f64 v[0:1], v[12:13], v[0:1]
932; GFX9-DENORM-NEXT:    v_add_f64 v[2:3], v[14:15], v[2:3]
933; GFX9-DENORM-NEXT:    v_add_f64 v[4:5], v[16:17], v[4:5]
934; GFX9-DENORM-NEXT:    s_setpc_b64 s[30:31]
935;
936; GFX9-UNSAFE-LABEL: test_3xdouble_add_mul_rhs:
937; GFX9-UNSAFE:       ; %bb.0: ; %.entry
938; GFX9-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
939; GFX9-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
940; GFX9-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
941; GFX9-UNSAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
942; GFX9-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
943;
944; GFX10-LABEL: test_3xdouble_add_mul_rhs:
945; GFX10:       ; %bb.0: ; %.entry
946; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
947; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], v[6:7]
948; GFX10-NEXT:    v_mul_f64 v[2:3], v[2:3], v[8:9]
949; GFX10-NEXT:    v_mul_f64 v[4:5], v[4:5], v[10:11]
950; GFX10-NEXT:    v_add_f64 v[0:1], v[12:13], v[0:1]
951; GFX10-NEXT:    v_add_f64 v[2:3], v[14:15], v[2:3]
952; GFX10-NEXT:    v_add_f64 v[4:5], v[16:17], v[4:5]
953; GFX10-NEXT:    s_setpc_b64 s[30:31]
954;
955; GFX10-CONTRACT-LABEL: test_3xdouble_add_mul_rhs:
956; GFX10-CONTRACT:       ; %bb.0: ; %.entry
957; GFX10-CONTRACT-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
958; GFX10-CONTRACT-NEXT:    v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
959; GFX10-CONTRACT-NEXT:    v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
960; GFX10-CONTRACT-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
961; GFX10-CONTRACT-NEXT:    s_setpc_b64 s[30:31]
962;
963; GFX10-DENORM-LABEL: test_3xdouble_add_mul_rhs:
964; GFX10-DENORM:       ; %bb.0: ; %.entry
965; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
966; GFX10-DENORM-NEXT:    v_mul_f64 v[0:1], v[0:1], v[6:7]
967; GFX10-DENORM-NEXT:    v_mul_f64 v[2:3], v[2:3], v[8:9]
968; GFX10-DENORM-NEXT:    v_mul_f64 v[4:5], v[4:5], v[10:11]
969; GFX10-DENORM-NEXT:    v_add_f64 v[0:1], v[12:13], v[0:1]
970; GFX10-DENORM-NEXT:    v_add_f64 v[2:3], v[14:15], v[2:3]
971; GFX10-DENORM-NEXT:    v_add_f64 v[4:5], v[16:17], v[4:5]
972; GFX10-DENORM-NEXT:    s_setpc_b64 s[30:31]
973;
974; GFX10-UNSAFE-LABEL: test_3xdouble_add_mul_rhs:
975; GFX10-UNSAFE:       ; %bb.0: ; %.entry
976; GFX10-UNSAFE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
977; GFX10-UNSAFE-NEXT:    v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
978; GFX10-UNSAFE-NEXT:    v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
979; GFX10-UNSAFE-NEXT:    v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
980; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
981.entry:
982  %a = fmul <3 x double> %x, %y
983  %b = fadd <3 x double> %z, %a
984  ret <3 x double> %b
985}
986