xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll (revision f6ace2bc15bfde4cc9bd140859fa92618568a006)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
3; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
4
5target triple = "aarch64-unknown-linux-gnu"
6
7;
8; FMA
9;
10
11define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) {
12; CHECK-LABEL: fma_v4f16:
13; CHECK:       // %bb.0:
14; CHECK-NEXT:    ptrue p0.h, vl4
15; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
16; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
17; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
18; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
19; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
20; CHECK-NEXT:    ret
21;
22; NONEON-NOSVE-LABEL: fma_v4f16:
23; NONEON-NOSVE:       // %bb.0:
24; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #-32]!
25; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
26; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
27; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
28; NONEON-NOSVE-NEXT:    str d2, [sp, #16]
29; NONEON-NOSVE-NEXT:    ldr h2, [sp, #22]
30; NONEON-NOSVE-NEXT:    ldr h6, [sp, #12]
31; NONEON-NOSVE-NEXT:    ldr h7, [sp, #4]
32; NONEON-NOSVE-NEXT:    fcvt s0, h0
33; NONEON-NOSVE-NEXT:    fcvt s1, h1
34; NONEON-NOSVE-NEXT:    ldr h4, [sp, #10]
35; NONEON-NOSVE-NEXT:    fcvt s2, h2
36; NONEON-NOSVE-NEXT:    fcvt s6, h6
37; NONEON-NOSVE-NEXT:    fcvt s7, h7
38; NONEON-NOSVE-NEXT:    ldr h5, [sp, #2]
39; NONEON-NOSVE-NEXT:    fcvt s4, h4
40; NONEON-NOSVE-NEXT:    ldr h3, [sp]
41; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
42; NONEON-NOSVE-NEXT:    fcvt s5, h5
43; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
44; NONEON-NOSVE-NEXT:    fcvt s3, h3
45; NONEON-NOSVE-NEXT:    fcvt s1, h1
46; NONEON-NOSVE-NEXT:    fcvt h0, s0
47; NONEON-NOSVE-NEXT:    fmul s1, s3, s1
48; NONEON-NOSVE-NEXT:    fcvt s0, h0
49; NONEON-NOSVE-NEXT:    fcvt h1, s1
50; NONEON-NOSVE-NEXT:    fadd s0, s0, s2
51; NONEON-NOSVE-NEXT:    fmul s2, s7, s6
52; NONEON-NOSVE-NEXT:    fcvt s1, h1
53; NONEON-NOSVE-NEXT:    fcvt h0, s0
54; NONEON-NOSVE-NEXT:    fcvt h2, s2
55; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
56; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
57; NONEON-NOSVE-NEXT:    fcvt s2, h2
58; NONEON-NOSVE-NEXT:    fcvt s0, h0
59; NONEON-NOSVE-NEXT:    fadd s0, s2, s0
60; NONEON-NOSVE-NEXT:    fmul s2, s5, s4
61; NONEON-NOSVE-NEXT:    fcvt h0, s0
62; NONEON-NOSVE-NEXT:    fcvt h2, s2
63; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
64; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
65; NONEON-NOSVE-NEXT:    fcvt s2, h2
66; NONEON-NOSVE-NEXT:    fcvt s0, h0
67; NONEON-NOSVE-NEXT:    fadd s0, s2, s0
68; NONEON-NOSVE-NEXT:    fcvt h0, s0
69; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
70; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
71; NONEON-NOSVE-NEXT:    fcvt s0, h0
72; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
73; NONEON-NOSVE-NEXT:    fcvt h0, s0
74; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
75; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
76; NONEON-NOSVE-NEXT:    add sp, sp, #32
77; NONEON-NOSVE-NEXT:    ret
78  %mul = fmul contract <4 x half> %op1, %op2
79  %res = fadd contract <4 x half> %mul, %op3
80  ret <4 x half> %res
81}
82
83define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) {
84; CHECK-LABEL: fma_v8f16:
85; CHECK:       // %bb.0:
86; CHECK-NEXT:    ptrue p0.h, vl8
87; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
88; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
89; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
90; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
91; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
92; CHECK-NEXT:    ret
93;
94; NONEON-NOSVE-LABEL: fma_v8f16:
95; NONEON-NOSVE:       // %bb.0:
96; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-64]!
97; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
98; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
99; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
100; NONEON-NOSVE-NEXT:    str q2, [sp, #32]
101; NONEON-NOSVE-NEXT:    ldr h2, [sp, #46]
102; NONEON-NOSVE-NEXT:    ldr h22, [sp, #28]
103; NONEON-NOSVE-NEXT:    ldr h23, [sp, #12]
104; NONEON-NOSVE-NEXT:    fcvt s3, h0
105; NONEON-NOSVE-NEXT:    fcvt s1, h1
106; NONEON-NOSVE-NEXT:    ldr h20, [sp, #26]
107; NONEON-NOSVE-NEXT:    fcvt s2, h2
108; NONEON-NOSVE-NEXT:    fcvt s22, h22
109; NONEON-NOSVE-NEXT:    fcvt s23, h23
110; NONEON-NOSVE-NEXT:    ldr h21, [sp, #10]
111; NONEON-NOSVE-NEXT:    fcvt s20, h20
112; NONEON-NOSVE-NEXT:    ldr h18, [sp, #24]
113; NONEON-NOSVE-NEXT:    ldr h19, [sp, #8]
114; NONEON-NOSVE-NEXT:    ldr h16, [sp, #22]
115; NONEON-NOSVE-NEXT:    ldr h17, [sp, #6]
116; NONEON-NOSVE-NEXT:    fmul s5, s1, s3
117; NONEON-NOSVE-NEXT:    fcvt s21, h21
118; NONEON-NOSVE-NEXT:    fcvt s18, h18
119; NONEON-NOSVE-NEXT:    fcvt s19, h19
120; NONEON-NOSVE-NEXT:    fcvt s16, h16
121; NONEON-NOSVE-NEXT:    fcvt s17, h17
122; NONEON-NOSVE-NEXT:    ldr h6, [sp, #20]
123; NONEON-NOSVE-NEXT:    ldr h7, [sp, #4]
124; NONEON-NOSVE-NEXT:    ldr h3, [sp, #18]
125; NONEON-NOSVE-NEXT:    ldr h4, [sp, #2]
126; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
127; NONEON-NOSVE-NEXT:    ldr h1, [sp]
128; NONEON-NOSVE-NEXT:    fcvt h5, s5
129; NONEON-NOSVE-NEXT:    fcvt s6, h6
130; NONEON-NOSVE-NEXT:    fcvt s7, h7
131; NONEON-NOSVE-NEXT:    fcvt s3, h3
132; NONEON-NOSVE-NEXT:    fcvt s4, h4
133; NONEON-NOSVE-NEXT:    fcvt s0, h0
134; NONEON-NOSVE-NEXT:    fcvt s1, h1
135; NONEON-NOSVE-NEXT:    fcvt s5, h5
136; NONEON-NOSVE-NEXT:    fmul s3, s4, s3
137; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
138; NONEON-NOSVE-NEXT:    fadd s2, s5, s2
139; NONEON-NOSVE-NEXT:    fmul s5, s23, s22
140; NONEON-NOSVE-NEXT:    fcvt h3, s3
141; NONEON-NOSVE-NEXT:    fcvt h0, s0
142; NONEON-NOSVE-NEXT:    fcvt h2, s2
143; NONEON-NOSVE-NEXT:    fcvt h5, s5
144; NONEON-NOSVE-NEXT:    fcvt s3, h3
145; NONEON-NOSVE-NEXT:    fcvt s0, h0
146; NONEON-NOSVE-NEXT:    str h2, [sp, #62]
147; NONEON-NOSVE-NEXT:    ldr h2, [sp, #44]
148; NONEON-NOSVE-NEXT:    fcvt s5, h5
149; NONEON-NOSVE-NEXT:    fcvt s2, h2
150; NONEON-NOSVE-NEXT:    fadd s2, s5, s2
151; NONEON-NOSVE-NEXT:    fmul s5, s21, s20
152; NONEON-NOSVE-NEXT:    fcvt h2, s2
153; NONEON-NOSVE-NEXT:    fcvt h5, s5
154; NONEON-NOSVE-NEXT:    str h2, [sp, #60]
155; NONEON-NOSVE-NEXT:    ldr h2, [sp, #42]
156; NONEON-NOSVE-NEXT:    fcvt s5, h5
157; NONEON-NOSVE-NEXT:    fcvt s2, h2
158; NONEON-NOSVE-NEXT:    fadd s2, s5, s2
159; NONEON-NOSVE-NEXT:    fmul s5, s19, s18
160; NONEON-NOSVE-NEXT:    fcvt h2, s2
161; NONEON-NOSVE-NEXT:    fcvt h5, s5
162; NONEON-NOSVE-NEXT:    str h2, [sp, #58]
163; NONEON-NOSVE-NEXT:    ldr h2, [sp, #40]
164; NONEON-NOSVE-NEXT:    fcvt s5, h5
165; NONEON-NOSVE-NEXT:    fcvt s2, h2
166; NONEON-NOSVE-NEXT:    fadd s2, s5, s2
167; NONEON-NOSVE-NEXT:    fmul s5, s17, s16
168; NONEON-NOSVE-NEXT:    fcvt h2, s2
169; NONEON-NOSVE-NEXT:    fcvt h5, s5
170; NONEON-NOSVE-NEXT:    str h2, [sp, #56]
171; NONEON-NOSVE-NEXT:    ldr h2, [sp, #38]
172; NONEON-NOSVE-NEXT:    fcvt s5, h5
173; NONEON-NOSVE-NEXT:    fcvt s2, h2
174; NONEON-NOSVE-NEXT:    fadd s2, s5, s2
175; NONEON-NOSVE-NEXT:    fmul s5, s7, s6
176; NONEON-NOSVE-NEXT:    fcvt h2, s2
177; NONEON-NOSVE-NEXT:    fcvt h5, s5
178; NONEON-NOSVE-NEXT:    str h2, [sp, #54]
179; NONEON-NOSVE-NEXT:    ldr h2, [sp, #36]
180; NONEON-NOSVE-NEXT:    fcvt s5, h5
181; NONEON-NOSVE-NEXT:    fcvt s2, h2
182; NONEON-NOSVE-NEXT:    fadd s2, s5, s2
183; NONEON-NOSVE-NEXT:    fcvt h2, s2
184; NONEON-NOSVE-NEXT:    str h2, [sp, #52]
185; NONEON-NOSVE-NEXT:    ldr h2, [sp, #34]
186; NONEON-NOSVE-NEXT:    fcvt s2, h2
187; NONEON-NOSVE-NEXT:    fadd s2, s3, s2
188; NONEON-NOSVE-NEXT:    fcvt h1, s2
189; NONEON-NOSVE-NEXT:    str h1, [sp, #50]
190; NONEON-NOSVE-NEXT:    ldr h1, [sp, #32]
191; NONEON-NOSVE-NEXT:    fcvt s1, h1
192; NONEON-NOSVE-NEXT:    fadd s0, s0, s1
193; NONEON-NOSVE-NEXT:    fcvt h0, s0
194; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
195; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
196; NONEON-NOSVE-NEXT:    add sp, sp, #64
197; NONEON-NOSVE-NEXT:    ret
198  %mul = fmul contract <8 x half> %op1, %op2
199  %res = fadd contract <8 x half> %mul, %op3
200  ret <8 x half> %res
201}
202
203define void @fma_v16f16(ptr %a, ptr %b, ptr %c) {
204; CHECK-LABEL: fma_v16f16:
205; CHECK:       // %bb.0:
206; CHECK-NEXT:    ldp q0, q4, [x1]
207; CHECK-NEXT:    ptrue p0.h, vl8
208; CHECK-NEXT:    ldp q1, q5, [x2]
209; CHECK-NEXT:    ldp q2, q3, [x0]
210; CHECK-NEXT:    fmad z0.h, p0/m, z2.h, z1.h
211; CHECK-NEXT:    movprfx z1, z5
212; CHECK-NEXT:    fmla z1.h, p0/m, z3.h, z4.h
213; CHECK-NEXT:    stp q0, q1, [x0]
214; CHECK-NEXT:    ret
215;
216; NONEON-NOSVE-LABEL: fma_v16f16:
217; NONEON-NOSVE:       // %bb.0:
218; NONEON-NOSVE-NEXT:    sub sp, sp, #208
219; NONEON-NOSVE-NEXT:    stp d15, d14, [sp, #144] // 16-byte Folded Spill
220; NONEON-NOSVE-NEXT:    stp d13, d12, [sp, #160] // 16-byte Folded Spill
221; NONEON-NOSVE-NEXT:    stp d11, d10, [sp, #176] // 16-byte Folded Spill
222; NONEON-NOSVE-NEXT:    stp d9, d8, [sp, #192] // 16-byte Folded Spill
223; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 208
224; NONEON-NOSVE-NEXT:    .cfi_offset b8, -8
225; NONEON-NOSVE-NEXT:    .cfi_offset b9, -16
226; NONEON-NOSVE-NEXT:    .cfi_offset b10, -24
227; NONEON-NOSVE-NEXT:    .cfi_offset b11, -32
228; NONEON-NOSVE-NEXT:    .cfi_offset b12, -40
229; NONEON-NOSVE-NEXT:    .cfi_offset b13, -48
230; NONEON-NOSVE-NEXT:    .cfi_offset b14, -56
231; NONEON-NOSVE-NEXT:    .cfi_offset b15, -64
232; NONEON-NOSVE-NEXT:    ldp q0, q1, [x1]
233; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
234; NONEON-NOSVE-NEXT:    ldp q18, q19, [x2]
235; NONEON-NOSVE-NEXT:    stp q3, q0, [sp, #16]
236; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #64]
237; NONEON-NOSVE-NEXT:    ldr h24, [sp, #46]
238; NONEON-NOSVE-NEXT:    ldr h25, [sp, #30]
239; NONEON-NOSVE-NEXT:    ldr h0, [sp, #94]
240; NONEON-NOSVE-NEXT:    ldr h1, [sp, #78]
241; NONEON-NOSVE-NEXT:    str q19, [sp, #96]
242; NONEON-NOSVE-NEXT:    str q18, [sp, #48]
243; NONEON-NOSVE-NEXT:    ldr h18, [sp, #110]
244; NONEON-NOSVE-NEXT:    ldr h15, [sp, #92]
245; NONEON-NOSVE-NEXT:    fcvt s20, h0
246; NONEON-NOSVE-NEXT:    fcvt s21, h1
247; NONEON-NOSVE-NEXT:    ldr h0, [sp, #32]
248; NONEON-NOSVE-NEXT:    fcvt s18, h18
249; NONEON-NOSVE-NEXT:    ldr h13, [sp, #90]
250; NONEON-NOSVE-NEXT:    ldr h14, [sp, #74]
251; NONEON-NOSVE-NEXT:    str h0, [sp, #14] // 2-byte Folded Spill
252; NONEON-NOSVE-NEXT:    ldr h0, [sp, #76]
253; NONEON-NOSVE-NEXT:    ldr h11, [sp, #88]
254; NONEON-NOSVE-NEXT:    ldr h12, [sp, #72]
255; NONEON-NOSVE-NEXT:    ldr h9, [sp, #86]
256; NONEON-NOSVE-NEXT:    ldr h10, [sp, #70]
257; NONEON-NOSVE-NEXT:    fmul s30, s21, s20
258; NONEON-NOSVE-NEXT:    fcvt s0, h0
259; NONEON-NOSVE-NEXT:    ldr h31, [sp, #84]
260; NONEON-NOSVE-NEXT:    ldr h8, [sp, #68]
261; NONEON-NOSVE-NEXT:    ldr h28, [sp, #82]
262; NONEON-NOSVE-NEXT:    ldr h29, [sp, #66]
263; NONEON-NOSVE-NEXT:    ldr h26, [sp, #80]
264; NONEON-NOSVE-NEXT:    ldr h27, [sp, #64]
265; NONEON-NOSVE-NEXT:    ldr h22, [sp, #44]
266; NONEON-NOSVE-NEXT:    ldr h23, [sp, #28]
267; NONEON-NOSVE-NEXT:    ldr h20, [sp, #42]
268; NONEON-NOSVE-NEXT:    ldr h21, [sp, #26]
269; NONEON-NOSVE-NEXT:    fcvt h19, s30
270; NONEON-NOSVE-NEXT:    fcvt s30, h15
271; NONEON-NOSVE-NEXT:    ldr h16, [sp, #40]
272; NONEON-NOSVE-NEXT:    ldr h17, [sp, #24]
273; NONEON-NOSVE-NEXT:    ldr h6, [sp, #38]
274; NONEON-NOSVE-NEXT:    ldr h7, [sp, #22]
275; NONEON-NOSVE-NEXT:    fcvt s16, h16
276; NONEON-NOSVE-NEXT:    ldr h4, [sp, #36]
277; NONEON-NOSVE-NEXT:    ldr h5, [sp, #20]
278; NONEON-NOSVE-NEXT:    fcvt s17, h17
279; NONEON-NOSVE-NEXT:    fcvt s6, h6
280; NONEON-NOSVE-NEXT:    fcvt s7, h7
281; NONEON-NOSVE-NEXT:    fcvt s19, h19
282; NONEON-NOSVE-NEXT:    fmul s0, s0, s30
283; NONEON-NOSVE-NEXT:    fcvt s30, h14
284; NONEON-NOSVE-NEXT:    fcvt s4, h4
285; NONEON-NOSVE-NEXT:    fcvt s5, h5
286; NONEON-NOSVE-NEXT:    ldr h2, [sp, #34]
287; NONEON-NOSVE-NEXT:    ldr h3, [sp, #18]
288; NONEON-NOSVE-NEXT:    ldr h1, [sp, #16]
289; NONEON-NOSVE-NEXT:    fmul s16, s17, s16
290; NONEON-NOSVE-NEXT:    fmul s6, s7, s6
291; NONEON-NOSVE-NEXT:    fcvt s2, h2
292; NONEON-NOSVE-NEXT:    fadd s18, s19, s18
293; NONEON-NOSVE-NEXT:    fcvt h0, s0
294; NONEON-NOSVE-NEXT:    fcvt s19, h13
295; NONEON-NOSVE-NEXT:    fmul s4, s5, s4
296; NONEON-NOSVE-NEXT:    fcvt s3, h3
297; NONEON-NOSVE-NEXT:    fcvt s1, h1
298; NONEON-NOSVE-NEXT:    ldp d15, d14, [sp, #144] // 16-byte Folded Reload
299; NONEON-NOSVE-NEXT:    fcvt h16, s16
300; NONEON-NOSVE-NEXT:    fcvt h6, s6
301; NONEON-NOSVE-NEXT:    fcvt h18, s18
302; NONEON-NOSVE-NEXT:    fcvt s0, h0
303; NONEON-NOSVE-NEXT:    fcvt h4, s4
304; NONEON-NOSVE-NEXT:    fmul s2, s3, s2
305; NONEON-NOSVE-NEXT:    ldr h3, [sp, #14] // 2-byte Folded Reload
306; NONEON-NOSVE-NEXT:    fcvt s16, h16
307; NONEON-NOSVE-NEXT:    fcvt s6, h6
308; NONEON-NOSVE-NEXT:    fcvt s3, h3
309; NONEON-NOSVE-NEXT:    str h18, [sp, #142]
310; NONEON-NOSVE-NEXT:    ldr h18, [sp, #108]
311; NONEON-NOSVE-NEXT:    fcvt s4, h4
312; NONEON-NOSVE-NEXT:    fcvt h2, s2
313; NONEON-NOSVE-NEXT:    fcvt s18, h18
314; NONEON-NOSVE-NEXT:    fmul s1, s1, s3
315; NONEON-NOSVE-NEXT:    fcvt s2, h2
316; NONEON-NOSVE-NEXT:    fadd s0, s0, s18
317; NONEON-NOSVE-NEXT:    fmul s18, s30, s19
318; NONEON-NOSVE-NEXT:    fcvt s19, h11
319; NONEON-NOSVE-NEXT:    fcvt s30, h12
320; NONEON-NOSVE-NEXT:    fcvt h1, s1
321; NONEON-NOSVE-NEXT:    ldp d13, d12, [sp, #160] // 16-byte Folded Reload
322; NONEON-NOSVE-NEXT:    fcvt h0, s0
323; NONEON-NOSVE-NEXT:    fcvt h18, s18
324; NONEON-NOSVE-NEXT:    fcvt s1, h1
325; NONEON-NOSVE-NEXT:    str h0, [sp, #140]
326; NONEON-NOSVE-NEXT:    ldr h0, [sp, #106]
327; NONEON-NOSVE-NEXT:    fcvt s18, h18
328; NONEON-NOSVE-NEXT:    fcvt s0, h0
329; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
330; NONEON-NOSVE-NEXT:    fmul s18, s30, s19
331; NONEON-NOSVE-NEXT:    fcvt s19, h9
332; NONEON-NOSVE-NEXT:    fcvt s30, h10
333; NONEON-NOSVE-NEXT:    ldp d11, d10, [sp, #176] // 16-byte Folded Reload
334; NONEON-NOSVE-NEXT:    fcvt h0, s0
335; NONEON-NOSVE-NEXT:    fcvt h18, s18
336; NONEON-NOSVE-NEXT:    str h0, [sp, #138]
337; NONEON-NOSVE-NEXT:    ldr h0, [sp, #104]
338; NONEON-NOSVE-NEXT:    fcvt s18, h18
339; NONEON-NOSVE-NEXT:    fcvt s0, h0
340; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
341; NONEON-NOSVE-NEXT:    fmul s18, s30, s19
342; NONEON-NOSVE-NEXT:    fcvt s19, h31
343; NONEON-NOSVE-NEXT:    fcvt s30, h8
344; NONEON-NOSVE-NEXT:    ldp d9, d8, [sp, #192] // 16-byte Folded Reload
345; NONEON-NOSVE-NEXT:    fcvt h0, s0
346; NONEON-NOSVE-NEXT:    fcvt h18, s18
347; NONEON-NOSVE-NEXT:    str h0, [sp, #136]
348; NONEON-NOSVE-NEXT:    ldr h0, [sp, #102]
349; NONEON-NOSVE-NEXT:    fcvt s18, h18
350; NONEON-NOSVE-NEXT:    fcvt s0, h0
351; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
352; NONEON-NOSVE-NEXT:    fmul s18, s30, s19
353; NONEON-NOSVE-NEXT:    fcvt s19, h28
354; NONEON-NOSVE-NEXT:    fcvt s28, h29
355; NONEON-NOSVE-NEXT:    fcvt h0, s0
356; NONEON-NOSVE-NEXT:    fcvt h18, s18
357; NONEON-NOSVE-NEXT:    str h0, [sp, #134]
358; NONEON-NOSVE-NEXT:    ldr h0, [sp, #100]
359; NONEON-NOSVE-NEXT:    fcvt s18, h18
360; NONEON-NOSVE-NEXT:    fcvt s0, h0
361; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
362; NONEON-NOSVE-NEXT:    fmul s18, s28, s19
363; NONEON-NOSVE-NEXT:    fcvt s19, h26
364; NONEON-NOSVE-NEXT:    fcvt s26, h27
365; NONEON-NOSVE-NEXT:    fcvt h0, s0
366; NONEON-NOSVE-NEXT:    fcvt h18, s18
367; NONEON-NOSVE-NEXT:    str h0, [sp, #132]
368; NONEON-NOSVE-NEXT:    ldr h0, [sp, #98]
369; NONEON-NOSVE-NEXT:    fcvt s18, h18
370; NONEON-NOSVE-NEXT:    fcvt s0, h0
371; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
372; NONEON-NOSVE-NEXT:    fmul s18, s26, s19
373; NONEON-NOSVE-NEXT:    fcvt s19, h24
374; NONEON-NOSVE-NEXT:    fcvt s24, h25
375; NONEON-NOSVE-NEXT:    fcvt h0, s0
376; NONEON-NOSVE-NEXT:    fcvt h18, s18
377; NONEON-NOSVE-NEXT:    str h0, [sp, #130]
378; NONEON-NOSVE-NEXT:    ldr h0, [sp, #96]
379; NONEON-NOSVE-NEXT:    fcvt s18, h18
380; NONEON-NOSVE-NEXT:    fcvt s0, h0
381; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
382; NONEON-NOSVE-NEXT:    fmul s18, s24, s19
383; NONEON-NOSVE-NEXT:    fcvt s19, h22
384; NONEON-NOSVE-NEXT:    fcvt s22, h23
385; NONEON-NOSVE-NEXT:    fcvt h0, s0
386; NONEON-NOSVE-NEXT:    fcvt h18, s18
387; NONEON-NOSVE-NEXT:    str h0, [sp, #128]
388; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
389; NONEON-NOSVE-NEXT:    fcvt s18, h18
390; NONEON-NOSVE-NEXT:    fcvt s0, h0
391; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
392; NONEON-NOSVE-NEXT:    fmul s18, s22, s19
393; NONEON-NOSVE-NEXT:    fcvt s19, h20
394; NONEON-NOSVE-NEXT:    fcvt s20, h21
395; NONEON-NOSVE-NEXT:    fcvt h0, s0
396; NONEON-NOSVE-NEXT:    fcvt h18, s18
397; NONEON-NOSVE-NEXT:    str h0, [sp, #126]
398; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
399; NONEON-NOSVE-NEXT:    fcvt s18, h18
400; NONEON-NOSVE-NEXT:    fcvt s0, h0
401; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
402; NONEON-NOSVE-NEXT:    fmul s18, s20, s19
403; NONEON-NOSVE-NEXT:    fcvt h0, s0
404; NONEON-NOSVE-NEXT:    fcvt h18, s18
405; NONEON-NOSVE-NEXT:    str h0, [sp, #124]
406; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
407; NONEON-NOSVE-NEXT:    fcvt s18, h18
408; NONEON-NOSVE-NEXT:    fcvt s0, h0
409; NONEON-NOSVE-NEXT:    fadd s0, s18, s0
410; NONEON-NOSVE-NEXT:    fcvt h0, s0
411; NONEON-NOSVE-NEXT:    str h0, [sp, #122]
412; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
413; NONEON-NOSVE-NEXT:    fcvt s0, h0
414; NONEON-NOSVE-NEXT:    fadd s0, s16, s0
415; NONEON-NOSVE-NEXT:    fcvt h0, s0
416; NONEON-NOSVE-NEXT:    str h0, [sp, #120]
417; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
418; NONEON-NOSVE-NEXT:    fcvt s0, h0
419; NONEON-NOSVE-NEXT:    fadd s0, s6, s0
420; NONEON-NOSVE-NEXT:    fcvt h0, s0
421; NONEON-NOSVE-NEXT:    str h0, [sp, #118]
422; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
423; NONEON-NOSVE-NEXT:    fcvt s0, h0
424; NONEON-NOSVE-NEXT:    fadd s0, s4, s0
425; NONEON-NOSVE-NEXT:    fcvt h0, s0
426; NONEON-NOSVE-NEXT:    str h0, [sp, #116]
427; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
428; NONEON-NOSVE-NEXT:    fcvt s0, h0
429; NONEON-NOSVE-NEXT:    fadd s0, s2, s0
430; NONEON-NOSVE-NEXT:    fcvt h0, s0
431; NONEON-NOSVE-NEXT:    str h0, [sp, #114]
432; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
433; NONEON-NOSVE-NEXT:    fcvt s0, h0
434; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
435; NONEON-NOSVE-NEXT:    fcvt h0, s0
436; NONEON-NOSVE-NEXT:    str h0, [sp, #112]
437; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #112]
438; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
439; NONEON-NOSVE-NEXT:    add sp, sp, #208
440; NONEON-NOSVE-NEXT:    ret
441  %op1 = load <16 x half>, ptr %a
442  %op2 = load <16 x half>, ptr %b
443  %op3 = load <16 x half>, ptr %c
444  %mul = fmul contract <16 x half> %op1, %op2
445  %res = fadd contract <16 x half> %mul, %op3
446  store <16 x half> %res, ptr %a
447  ret void
448}
449
450define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) {
451; CHECK-LABEL: fma_v2f32:
452; CHECK:       // %bb.0:
453; CHECK-NEXT:    ptrue p0.s, vl2
454; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
455; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
456; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
457; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
458; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
459; CHECK-NEXT:    ret
460;
461; NONEON-NOSVE-LABEL: fma_v2f32:
462; NONEON-NOSVE:       // %bb.0:
463; NONEON-NOSVE-NEXT:    sub sp, sp, #32
464; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
465; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #8]
466; NONEON-NOSVE-NEXT:    str d0, [sp]
467; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #8]
468; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp]
469; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
470; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
471; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
472; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
473; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #24]
474; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
475; NONEON-NOSVE-NEXT:    add sp, sp, #32
476; NONEON-NOSVE-NEXT:    ret
477  %mul = fmul contract <2 x float> %op1, %op2
478  %res = fadd contract <2 x float> %mul, %op3
479  ret <2 x float> %res
480}
481
482define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) {
483; CHECK-LABEL: fma_v4f32:
484; CHECK:       // %bb.0:
485; CHECK-NEXT:    ptrue p0.s, vl4
486; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
487; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
488; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
489; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
490; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
491; CHECK-NEXT:    ret
492;
493; NONEON-NOSVE-LABEL: fma_v4f32:
494; NONEON-NOSVE:       // %bb.0:
495; NONEON-NOSVE-NEXT:    sub sp, sp, #64
496; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
497; NONEON-NOSVE-NEXT:    stp q1, q2, [sp, #16]
498; NONEON-NOSVE-NEXT:    str q0, [sp]
499; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #24]
500; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #8]
501; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
502; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
503; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
504; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
505; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #16]
506; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp]
507; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #56]
508; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
509; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
510; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
511; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
512; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #48]
513; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
514; NONEON-NOSVE-NEXT:    add sp, sp, #64
515; NONEON-NOSVE-NEXT:    ret
516  %mul = fmul contract <4 x float> %op1, %op2
517  %res = fadd contract <4 x float> %mul, %op3
518  ret <4 x float> %res
519}
520
521define void @fma_v8f32(ptr %a, ptr %b, ptr %c) {
522; CHECK-LABEL: fma_v8f32:
523; CHECK:       // %bb.0:
524; CHECK-NEXT:    ldp q0, q4, [x1]
525; CHECK-NEXT:    ptrue p0.s, vl4
526; CHECK-NEXT:    ldp q1, q5, [x2]
527; CHECK-NEXT:    ldp q2, q3, [x0]
528; CHECK-NEXT:    fmad z0.s, p0/m, z2.s, z1.s
529; CHECK-NEXT:    movprfx z1, z5
530; CHECK-NEXT:    fmla z1.s, p0/m, z3.s, z4.s
531; CHECK-NEXT:    stp q0, q1, [x0]
532; CHECK-NEXT:    ret
533;
534; NONEON-NOSVE-LABEL: fma_v8f32:
535; NONEON-NOSVE:       // %bb.0:
536; NONEON-NOSVE-NEXT:    sub sp, sp, #128
537; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 128
538; NONEON-NOSVE-NEXT:    ldp q1, q0, [x2]
539; NONEON-NOSVE-NEXT:    ldp q2, q3, [x1]
540; NONEON-NOSVE-NEXT:    ldp q4, q5, [x0]
541; NONEON-NOSVE-NEXT:    stp q3, q0, [sp, #64]
542; NONEON-NOSVE-NEXT:    stp q4, q2, [sp]
543; NONEON-NOSVE-NEXT:    ldr s0, [sp, #92]
544; NONEON-NOSVE-NEXT:    stp q1, q5, [sp, #32]
545; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #72]
546; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #56]
547; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
548; NONEON-NOSVE-NEXT:    ldr s0, [sp, #88]
549; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
550; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #64]
551; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #48]
552; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #120]
553; NONEON-NOSVE-NEXT:    ldr s0, [sp, #84]
554; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
555; NONEON-NOSVE-NEXT:    ldr s0, [sp, #80]
556; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
557; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #24]
558; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #8]
559; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #112]
560; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
561; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
562; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
563; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
564; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #16]
565; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp]
566; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #104]
567; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
568; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
569; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
570; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
571; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #96]
572; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
573; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
574; NONEON-NOSVE-NEXT:    add sp, sp, #128
575; NONEON-NOSVE-NEXT:    ret
576  %op1 = load <8 x float>, ptr %a
577  %op2 = load <8 x float>, ptr %b
578  %op3 = load <8 x float>, ptr %c
579  %mul = fmul contract <8 x float> %op1, %op2
580  %res = fadd contract <8 x float> %mul, %op3
581  store <8 x float> %res, ptr %a
582  ret void
583}
584
585define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double> %op3) {
586; CHECK-LABEL: fma_v1f64:
587; CHECK:       // %bb.0:
588; CHECK-NEXT:    fmadd d0, d0, d1, d2
589; CHECK-NEXT:    ret
590;
591; NONEON-NOSVE-LABEL: fma_v1f64:
592; NONEON-NOSVE:       // %bb.0:
593; NONEON-NOSVE-NEXT:    sub sp, sp, #16
594; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
595; NONEON-NOSVE-NEXT:    fmadd d0, d0, d1, d2
596; NONEON-NOSVE-NEXT:    str d0, [sp, #8]
597; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
598; NONEON-NOSVE-NEXT:    add sp, sp, #16
599; NONEON-NOSVE-NEXT:    ret
600  %mul = fmul contract <1 x double> %op1, %op2
601  %res = fadd contract <1 x double> %mul, %op3
602  ret <1 x double> %res
603}
604
605define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) {
606; CHECK-LABEL: fma_v2f64:
607; CHECK:       // %bb.0:
608; CHECK-NEXT:    ptrue p0.d, vl2
609; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
610; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
611; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
612; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
613; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
614; CHECK-NEXT:    ret
615;
616; NONEON-NOSVE-LABEL: fma_v2f64:
617; NONEON-NOSVE:       // %bb.0:
618; NONEON-NOSVE-NEXT:    sub sp, sp, #64
619; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
620; NONEON-NOSVE-NEXT:    stp q1, q2, [sp, #16]
621; NONEON-NOSVE-NEXT:    str q0, [sp]
622; NONEON-NOSVE-NEXT:    ldp d1, d3, [sp, #16]
623; NONEON-NOSVE-NEXT:    ldp d2, d4, [sp]
624; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
625; NONEON-NOSVE-NEXT:    fmadd d5, d4, d3, d0
626; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
627; NONEON-NOSVE-NEXT:    fmadd d0, d2, d1, d0
628; NONEON-NOSVE-NEXT:    stp d0, d5, [sp, #48]
629; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
630; NONEON-NOSVE-NEXT:    add sp, sp, #64
631; NONEON-NOSVE-NEXT:    ret
632  %mul = fmul contract <2 x double> %op1, %op2
633  %res = fadd contract <2 x double> %mul, %op3
634  ret <2 x double> %res
635}
636
637define void @fma_v4f64(ptr %a, ptr %b, ptr %c) {
638; CHECK-LABEL: fma_v4f64:
639; CHECK:       // %bb.0:
640; CHECK-NEXT:    ldp q0, q4, [x1]
641; CHECK-NEXT:    ptrue p0.d, vl2
642; CHECK-NEXT:    ldp q1, q5, [x2]
643; CHECK-NEXT:    ldp q2, q3, [x0]
644; CHECK-NEXT:    fmad z0.d, p0/m, z2.d, z1.d
645; CHECK-NEXT:    movprfx z1, z5
646; CHECK-NEXT:    fmla z1.d, p0/m, z3.d, z4.d
647; CHECK-NEXT:    stp q0, q1, [x0]
648; CHECK-NEXT:    ret
649;
650; NONEON-NOSVE-LABEL: fma_v4f64:
651; NONEON-NOSVE:       // %bb.0:
652; NONEON-NOSVE-NEXT:    sub sp, sp, #128
653; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 128
654; NONEON-NOSVE-NEXT:    ldp q1, q0, [x2]
655; NONEON-NOSVE-NEXT:    ldp q2, q3, [x1]
656; NONEON-NOSVE-NEXT:    ldp q4, q5, [x0]
657; NONEON-NOSVE-NEXT:    stp q3, q0, [sp, #64]
658; NONEON-NOSVE-NEXT:    stp q4, q2, [sp]
659; NONEON-NOSVE-NEXT:    ldr d0, [sp, #88]
660; NONEON-NOSVE-NEXT:    stp q1, q5, [sp, #32]
661; NONEON-NOSVE-NEXT:    ldp d1, d3, [sp, #64]
662; NONEON-NOSVE-NEXT:    ldp d2, d4, [sp, #48]
663; NONEON-NOSVE-NEXT:    fmadd d5, d4, d3, d0
664; NONEON-NOSVE-NEXT:    ldr d0, [sp, #80]
665; NONEON-NOSVE-NEXT:    fmadd d0, d2, d1, d0
666; NONEON-NOSVE-NEXT:    ldp d1, d3, [sp, #16]
667; NONEON-NOSVE-NEXT:    ldp d2, d4, [sp]
668; NONEON-NOSVE-NEXT:    stp d0, d5, [sp, #112]
669; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
670; NONEON-NOSVE-NEXT:    fmadd d5, d4, d3, d0
671; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
672; NONEON-NOSVE-NEXT:    fmadd d0, d2, d1, d0
673; NONEON-NOSVE-NEXT:    stp d0, d5, [sp, #96]
674; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
675; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
676; NONEON-NOSVE-NEXT:    add sp, sp, #128
677; NONEON-NOSVE-NEXT:    ret
678  %op1 = load <4 x double>, ptr %a
679  %op2 = load <4 x double>, ptr %b
680  %op3 = load <4 x double>, ptr %c
681  %mul = fmul contract <4 x double> %op1, %op2
682  %res = fadd contract <4 x double> %mul, %op3
683  store <4 x double> %res, ptr %a
684  ret void
685}
686