xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll (revision 8e0cd7382adacd8bc1741dc26bc0be6bdf8e238a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
3; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6target triple = "aarch64-unknown-linux-gnu"
7
8;
9; FADD
10;
11
12define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) {
13; CHECK-LABEL: fadd_v2f16:
14; CHECK:       // %bb.0:
15; CHECK-NEXT:    ptrue p0.h, vl4
16; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
17; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
18; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
19; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
20; CHECK-NEXT:    ret
21;
22; NONEON-NOSVE-LABEL: fadd_v2f16:
23; NONEON-NOSVE:       // %bb.0:
24; NONEON-NOSVE-NEXT:    sub sp, sp, #32
25; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
26; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
27; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
28; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
29; NONEON-NOSVE-NEXT:    fcvt s0, h0
30; NONEON-NOSVE-NEXT:    fcvt s1, h1
31; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
32; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
33; NONEON-NOSVE-NEXT:    fcvt s1, h1
34; NONEON-NOSVE-NEXT:    fcvt h0, s0
35; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
36; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
37; NONEON-NOSVE-NEXT:    fcvt s0, h0
38; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
39; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
40; NONEON-NOSVE-NEXT:    fcvt s1, h1
41; NONEON-NOSVE-NEXT:    fcvt h0, s0
42; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
43; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
44; NONEON-NOSVE-NEXT:    fcvt s0, h0
45; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
46; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
47; NONEON-NOSVE-NEXT:    fcvt s1, h1
48; NONEON-NOSVE-NEXT:    fcvt h0, s0
49; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
50; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
51; NONEON-NOSVE-NEXT:    fcvt s0, h0
52; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
53; NONEON-NOSVE-NEXT:    fcvt h0, s0
54; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
55; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
56; NONEON-NOSVE-NEXT:    add sp, sp, #32
57; NONEON-NOSVE-NEXT:    ret
58  %res = fadd <2 x half> %op1, %op2
59  ret <2 x half> %res
60}
61
62define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) {
63; CHECK-LABEL: fadd_v4f16:
64; CHECK:       // %bb.0:
65; CHECK-NEXT:    ptrue p0.h, vl4
66; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
67; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
68; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
69; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
70; CHECK-NEXT:    ret
71;
72; NONEON-NOSVE-LABEL: fadd_v4f16:
73; NONEON-NOSVE:       // %bb.0:
74; NONEON-NOSVE-NEXT:    sub sp, sp, #32
75; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
76; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
77; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
78; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
79; NONEON-NOSVE-NEXT:    fcvt s0, h0
80; NONEON-NOSVE-NEXT:    fcvt s1, h1
81; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
82; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
83; NONEON-NOSVE-NEXT:    fcvt s1, h1
84; NONEON-NOSVE-NEXT:    fcvt h0, s0
85; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
86; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
87; NONEON-NOSVE-NEXT:    fcvt s0, h0
88; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
89; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
90; NONEON-NOSVE-NEXT:    fcvt s1, h1
91; NONEON-NOSVE-NEXT:    fcvt h0, s0
92; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
93; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
94; NONEON-NOSVE-NEXT:    fcvt s0, h0
95; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
96; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
97; NONEON-NOSVE-NEXT:    fcvt s1, h1
98; NONEON-NOSVE-NEXT:    fcvt h0, s0
99; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
100; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
101; NONEON-NOSVE-NEXT:    fcvt s0, h0
102; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
103; NONEON-NOSVE-NEXT:    fcvt h0, s0
104; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
105; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
106; NONEON-NOSVE-NEXT:    add sp, sp, #32
107; NONEON-NOSVE-NEXT:    ret
108  %res = fadd <4 x half> %op1, %op2
109  ret <4 x half> %res
110}
111
112define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) {
113; CHECK-LABEL: fadd_v8f16:
114; CHECK:       // %bb.0:
115; CHECK-NEXT:    ptrue p0.h, vl8
116; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
117; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
118; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
119; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
120; CHECK-NEXT:    ret
121;
122; NONEON-NOSVE-LABEL: fadd_v8f16:
123; NONEON-NOSVE:       // %bb.0:
124; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
125; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
126; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
127; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
128; NONEON-NOSVE-NEXT:    fcvt s0, h0
129; NONEON-NOSVE-NEXT:    fcvt s1, h1
130; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
131; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
132; NONEON-NOSVE-NEXT:    fcvt s1, h1
133; NONEON-NOSVE-NEXT:    fcvt h0, s0
134; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
135; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
136; NONEON-NOSVE-NEXT:    fcvt s0, h0
137; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
138; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
139; NONEON-NOSVE-NEXT:    fcvt s1, h1
140; NONEON-NOSVE-NEXT:    fcvt h0, s0
141; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
142; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
143; NONEON-NOSVE-NEXT:    fcvt s0, h0
144; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
145; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
146; NONEON-NOSVE-NEXT:    fcvt s1, h1
147; NONEON-NOSVE-NEXT:    fcvt h0, s0
148; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
149; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
150; NONEON-NOSVE-NEXT:    fcvt s0, h0
151; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
152; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
153; NONEON-NOSVE-NEXT:    fcvt s1, h1
154; NONEON-NOSVE-NEXT:    fcvt h0, s0
155; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
156; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
157; NONEON-NOSVE-NEXT:    fcvt s0, h0
158; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
159; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
160; NONEON-NOSVE-NEXT:    fcvt s1, h1
161; NONEON-NOSVE-NEXT:    fcvt h0, s0
162; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
163; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
164; NONEON-NOSVE-NEXT:    fcvt s0, h0
165; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
166; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
167; NONEON-NOSVE-NEXT:    fcvt s1, h1
168; NONEON-NOSVE-NEXT:    fcvt h0, s0
169; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
170; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
171; NONEON-NOSVE-NEXT:    fcvt s0, h0
172; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
173; NONEON-NOSVE-NEXT:    ldr h1, [sp]
174; NONEON-NOSVE-NEXT:    fcvt s1, h1
175; NONEON-NOSVE-NEXT:    fcvt h0, s0
176; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
177; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
178; NONEON-NOSVE-NEXT:    fcvt s0, h0
179; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
180; NONEON-NOSVE-NEXT:    fcvt h0, s0
181; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
182; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
183; NONEON-NOSVE-NEXT:    add sp, sp, #48
184; NONEON-NOSVE-NEXT:    ret
185  %res = fadd <8 x half> %op1, %op2
186  ret <8 x half> %res
187}
188
189define void @fadd_v16f16(ptr %a, ptr %b) {
190; CHECK-LABEL: fadd_v16f16:
191; CHECK:       // %bb.0:
192; CHECK-NEXT:    ldp q0, q3, [x1]
193; CHECK-NEXT:    ptrue p0.h, vl8
194; CHECK-NEXT:    ldp q1, q2, [x0]
195; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
196; CHECK-NEXT:    movprfx z1, z2
197; CHECK-NEXT:    fadd z1.h, p0/m, z1.h, z3.h
198; CHECK-NEXT:    stp q0, q1, [x0]
199; CHECK-NEXT:    ret
200;
201; NONEON-NOSVE-LABEL: fadd_v16f16:
202; NONEON-NOSVE:       // %bb.0:
203; NONEON-NOSVE-NEXT:    sub sp, sp, #96
204; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
205; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
206; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
207; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
208; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
209; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
210; NONEON-NOSVE-NEXT:    ldr h1, [sp, #46]
211; NONEON-NOSVE-NEXT:    fcvt s0, h0
212; NONEON-NOSVE-NEXT:    fcvt s1, h1
213; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
214; NONEON-NOSVE-NEXT:    ldr h1, [sp, #44]
215; NONEON-NOSVE-NEXT:    fcvt s1, h1
216; NONEON-NOSVE-NEXT:    fcvt h0, s0
217; NONEON-NOSVE-NEXT:    str h0, [sp, #94]
218; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
219; NONEON-NOSVE-NEXT:    fcvt s0, h0
220; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
221; NONEON-NOSVE-NEXT:    ldr h1, [sp, #42]
222; NONEON-NOSVE-NEXT:    fcvt s1, h1
223; NONEON-NOSVE-NEXT:    fcvt h0, s0
224; NONEON-NOSVE-NEXT:    str h0, [sp, #92]
225; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
226; NONEON-NOSVE-NEXT:    fcvt s0, h0
227; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
228; NONEON-NOSVE-NEXT:    ldr h1, [sp, #40]
229; NONEON-NOSVE-NEXT:    fcvt s1, h1
230; NONEON-NOSVE-NEXT:    fcvt h0, s0
231; NONEON-NOSVE-NEXT:    str h0, [sp, #90]
232; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
233; NONEON-NOSVE-NEXT:    fcvt s0, h0
234; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
235; NONEON-NOSVE-NEXT:    ldr h1, [sp, #38]
236; NONEON-NOSVE-NEXT:    fcvt s1, h1
237; NONEON-NOSVE-NEXT:    fcvt h0, s0
238; NONEON-NOSVE-NEXT:    str h0, [sp, #88]
239; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
240; NONEON-NOSVE-NEXT:    fcvt s0, h0
241; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
242; NONEON-NOSVE-NEXT:    ldr h1, [sp, #36]
243; NONEON-NOSVE-NEXT:    fcvt s1, h1
244; NONEON-NOSVE-NEXT:    fcvt h0, s0
245; NONEON-NOSVE-NEXT:    str h0, [sp, #86]
246; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
247; NONEON-NOSVE-NEXT:    fcvt s0, h0
248; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
249; NONEON-NOSVE-NEXT:    ldr h1, [sp, #34]
250; NONEON-NOSVE-NEXT:    fcvt s1, h1
251; NONEON-NOSVE-NEXT:    fcvt h0, s0
252; NONEON-NOSVE-NEXT:    str h0, [sp, #84]
253; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
254; NONEON-NOSVE-NEXT:    fcvt s0, h0
255; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
256; NONEON-NOSVE-NEXT:    ldr h1, [sp, #32]
257; NONEON-NOSVE-NEXT:    fcvt s1, h1
258; NONEON-NOSVE-NEXT:    fcvt h0, s0
259; NONEON-NOSVE-NEXT:    str h0, [sp, #82]
260; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
261; NONEON-NOSVE-NEXT:    fcvt s0, h0
262; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
263; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
264; NONEON-NOSVE-NEXT:    fcvt s1, h1
265; NONEON-NOSVE-NEXT:    fcvt h0, s0
266; NONEON-NOSVE-NEXT:    str h0, [sp, #80]
267; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
268; NONEON-NOSVE-NEXT:    fcvt s0, h0
269; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
270; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
271; NONEON-NOSVE-NEXT:    fcvt s1, h1
272; NONEON-NOSVE-NEXT:    fcvt h0, s0
273; NONEON-NOSVE-NEXT:    str h0, [sp, #78]
274; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
275; NONEON-NOSVE-NEXT:    fcvt s0, h0
276; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
277; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
278; NONEON-NOSVE-NEXT:    fcvt s1, h1
279; NONEON-NOSVE-NEXT:    fcvt h0, s0
280; NONEON-NOSVE-NEXT:    str h0, [sp, #76]
281; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
282; NONEON-NOSVE-NEXT:    fcvt s0, h0
283; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
284; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
285; NONEON-NOSVE-NEXT:    fcvt s1, h1
286; NONEON-NOSVE-NEXT:    fcvt h0, s0
287; NONEON-NOSVE-NEXT:    str h0, [sp, #74]
288; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
289; NONEON-NOSVE-NEXT:    fcvt s0, h0
290; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
291; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
292; NONEON-NOSVE-NEXT:    fcvt s1, h1
293; NONEON-NOSVE-NEXT:    fcvt h0, s0
294; NONEON-NOSVE-NEXT:    str h0, [sp, #72]
295; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
296; NONEON-NOSVE-NEXT:    fcvt s0, h0
297; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
298; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
299; NONEON-NOSVE-NEXT:    fcvt s1, h1
300; NONEON-NOSVE-NEXT:    fcvt h0, s0
301; NONEON-NOSVE-NEXT:    str h0, [sp, #70]
302; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
303; NONEON-NOSVE-NEXT:    fcvt s0, h0
304; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
305; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
306; NONEON-NOSVE-NEXT:    fcvt s1, h1
307; NONEON-NOSVE-NEXT:    fcvt h0, s0
308; NONEON-NOSVE-NEXT:    str h0, [sp, #68]
309; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
310; NONEON-NOSVE-NEXT:    fcvt s0, h0
311; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
312; NONEON-NOSVE-NEXT:    ldr h1, [sp]
313; NONEON-NOSVE-NEXT:    fcvt s1, h1
314; NONEON-NOSVE-NEXT:    fcvt h0, s0
315; NONEON-NOSVE-NEXT:    str h0, [sp, #66]
316; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
317; NONEON-NOSVE-NEXT:    fcvt s0, h0
318; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
319; NONEON-NOSVE-NEXT:    fcvt h0, s0
320; NONEON-NOSVE-NEXT:    str h0, [sp, #64]
321; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
322; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
323; NONEON-NOSVE-NEXT:    add sp, sp, #96
324; NONEON-NOSVE-NEXT:    ret
325  %op1 = load <16 x half>, ptr %a
326  %op2 = load <16 x half>, ptr %b
327  %res = fadd <16 x half> %op1, %op2
328  store <16 x half> %res, ptr %a
329  ret void
330}
331
332define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) {
333; CHECK-LABEL: fadd_v2f32:
334; CHECK:       // %bb.0:
335; CHECK-NEXT:    ptrue p0.s, vl2
336; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
337; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
338; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
339; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
340; CHECK-NEXT:    ret
341;
342; NONEON-NOSVE-LABEL: fadd_v2f32:
343; NONEON-NOSVE:       // %bb.0:
344; NONEON-NOSVE-NEXT:    sub sp, sp, #32
345; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
346; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
347; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
348; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
349; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
350; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
351; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
352; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #24]
353; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
354; NONEON-NOSVE-NEXT:    add sp, sp, #32
355; NONEON-NOSVE-NEXT:    ret
356  %res = fadd <2 x float> %op1, %op2
357  ret <2 x float> %res
358}
359
360define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) {
361; CHECK-LABEL: fadd_v4f32:
362; CHECK:       // %bb.0:
363; CHECK-NEXT:    ptrue p0.s, vl4
364; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
365; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
366; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
367; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
368; CHECK-NEXT:    ret
369;
370; NONEON-NOSVE-LABEL: fadd_v4f32:
371; NONEON-NOSVE:       // %bb.0:
372; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
373; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
374; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
375; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
376; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
377; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
378; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
379; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
380; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #40]
381; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
382; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
383; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
384; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
385; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #32]
386; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
387; NONEON-NOSVE-NEXT:    add sp, sp, #48
388; NONEON-NOSVE-NEXT:    ret
389  %res = fadd <4 x float> %op1, %op2
390  ret <4 x float> %res
391}
392
393define void @fadd_v8f32(ptr %a, ptr %b) {
394; CHECK-LABEL: fadd_v8f32:
395; CHECK:       // %bb.0:
396; CHECK-NEXT:    ldp q0, q3, [x1]
397; CHECK-NEXT:    ptrue p0.s, vl4
398; CHECK-NEXT:    ldp q1, q2, [x0]
399; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
400; CHECK-NEXT:    movprfx z1, z2
401; CHECK-NEXT:    fadd z1.s, p0/m, z1.s, z3.s
402; CHECK-NEXT:    stp q0, q1, [x0]
403; CHECK-NEXT:    ret
404;
405; NONEON-NOSVE-LABEL: fadd_v8f32:
406; NONEON-NOSVE:       // %bb.0:
407; NONEON-NOSVE-NEXT:    sub sp, sp, #96
408; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
409; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
410; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
411; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
412; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
413; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #40]
414; NONEON-NOSVE-NEXT:    ldr s0, [sp, #60]
415; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
416; NONEON-NOSVE-NEXT:    ldr s0, [sp, #56]
417; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
418; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #32]
419; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #88]
420; NONEON-NOSVE-NEXT:    ldr s0, [sp, #52]
421; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
422; NONEON-NOSVE-NEXT:    ldr s0, [sp, #48]
423; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
424; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
425; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #80]
426; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
427; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
428; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
429; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
430; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
431; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #72]
432; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
433; NONEON-NOSVE-NEXT:    fadd s3, s2, s0
434; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
435; NONEON-NOSVE-NEXT:    fadd s0, s1, s0
436; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #64]
437; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
438; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
439; NONEON-NOSVE-NEXT:    add sp, sp, #96
440; NONEON-NOSVE-NEXT:    ret
441  %op1 = load <8 x float>, ptr %a
442  %op2 = load <8 x float>, ptr %b
443  %res = fadd <8 x float> %op1, %op2
444  store <8 x float> %res, ptr %a
445  ret void
446}
447
448define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) {
449; CHECK-LABEL: fadd_v2f64:
450; CHECK:       // %bb.0:
451; CHECK-NEXT:    ptrue p0.d, vl2
452; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
453; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
454; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
455; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
456; CHECK-NEXT:    ret
457;
458; NONEON-NOSVE-LABEL: fadd_v2f64:
459; NONEON-NOSVE:       // %bb.0:
460; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
461; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
462; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
463; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
464; NONEON-NOSVE-NEXT:    fadd d3, d2, d0
465; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
466; NONEON-NOSVE-NEXT:    fadd d0, d1, d0
467; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #32]
468; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
469; NONEON-NOSVE-NEXT:    add sp, sp, #48
470; NONEON-NOSVE-NEXT:    ret
471  %res = fadd <2 x double> %op1, %op2
472  ret <2 x double> %res
473}
474
475define void @fadd_v4f64(ptr %a, ptr %b) {
476; CHECK-LABEL: fadd_v4f64:
477; CHECK:       // %bb.0:
478; CHECK-NEXT:    ldp q0, q3, [x1]
479; CHECK-NEXT:    ptrue p0.d, vl2
480; CHECK-NEXT:    ldp q1, q2, [x0]
481; CHECK-NEXT:    fadd z0.d, p0/m, z0.d, z1.d
482; CHECK-NEXT:    movprfx z1, z2
483; CHECK-NEXT:    fadd z1.d, p0/m, z1.d, z3.d
484; CHECK-NEXT:    stp q0, q1, [x0]
485; CHECK-NEXT:    ret
486;
487; NONEON-NOSVE-LABEL: fadd_v4f64:
488; NONEON-NOSVE:       // %bb.0:
489; NONEON-NOSVE-NEXT:    sub sp, sp, #96
490; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
491; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
492; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
493; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
494; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
495; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp, #32]
496; NONEON-NOSVE-NEXT:    ldr d0, [sp, #56]
497; NONEON-NOSVE-NEXT:    fadd d3, d2, d0
498; NONEON-NOSVE-NEXT:    ldr d0, [sp, #48]
499; NONEON-NOSVE-NEXT:    fadd d0, d1, d0
500; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
501; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #80]
502; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
503; NONEON-NOSVE-NEXT:    fadd d3, d2, d0
504; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
505; NONEON-NOSVE-NEXT:    fadd d0, d1, d0
506; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #64]
507; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
508; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
509; NONEON-NOSVE-NEXT:    add sp, sp, #96
510; NONEON-NOSVE-NEXT:    ret
511  %op1 = load <4 x double>, ptr %a
512  %op2 = load <4 x double>, ptr %b
513  %res = fadd <4 x double> %op1, %op2
514  store <4 x double> %res, ptr %a
515  ret void
516}
517
518;
519; FDIV
520;
521
522define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) {
523; CHECK-LABEL: fdiv_v2f16:
524; CHECK:       // %bb.0:
525; CHECK-NEXT:    ptrue p0.h, vl4
526; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
527; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
528; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
529; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
530; CHECK-NEXT:    ret
531;
532; NONEON-NOSVE-LABEL: fdiv_v2f16:
533; NONEON-NOSVE:       // %bb.0:
534; NONEON-NOSVE-NEXT:    sub sp, sp, #32
535; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
536; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
537; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
538; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
539; NONEON-NOSVE-NEXT:    fcvt s0, h0
540; NONEON-NOSVE-NEXT:    fcvt s1, h1
541; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
542; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
543; NONEON-NOSVE-NEXT:    fcvt s1, h1
544; NONEON-NOSVE-NEXT:    fcvt h0, s0
545; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
546; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
547; NONEON-NOSVE-NEXT:    fcvt s0, h0
548; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
549; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
550; NONEON-NOSVE-NEXT:    fcvt s1, h1
551; NONEON-NOSVE-NEXT:    fcvt h0, s0
552; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
553; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
554; NONEON-NOSVE-NEXT:    fcvt s0, h0
555; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
556; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
557; NONEON-NOSVE-NEXT:    fcvt s1, h1
558; NONEON-NOSVE-NEXT:    fcvt h0, s0
559; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
560; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
561; NONEON-NOSVE-NEXT:    fcvt s0, h0
562; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
563; NONEON-NOSVE-NEXT:    fcvt h0, s0
564; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
565; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
566; NONEON-NOSVE-NEXT:    add sp, sp, #32
567; NONEON-NOSVE-NEXT:    ret
568  %res = fdiv <2 x half> %op1, %op2
569  ret <2 x half> %res
570}
571
572define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) {
573; CHECK-LABEL: fdiv_v4f16:
574; CHECK:       // %bb.0:
575; CHECK-NEXT:    ptrue p0.h, vl4
576; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
577; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
578; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
579; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
580; CHECK-NEXT:    ret
581;
582; NONEON-NOSVE-LABEL: fdiv_v4f16:
583; NONEON-NOSVE:       // %bb.0:
584; NONEON-NOSVE-NEXT:    sub sp, sp, #32
585; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
586; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
587; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
588; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
589; NONEON-NOSVE-NEXT:    fcvt s0, h0
590; NONEON-NOSVE-NEXT:    fcvt s1, h1
591; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
592; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
593; NONEON-NOSVE-NEXT:    fcvt s1, h1
594; NONEON-NOSVE-NEXT:    fcvt h0, s0
595; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
596; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
597; NONEON-NOSVE-NEXT:    fcvt s0, h0
598; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
599; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
600; NONEON-NOSVE-NEXT:    fcvt s1, h1
601; NONEON-NOSVE-NEXT:    fcvt h0, s0
602; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
603; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
604; NONEON-NOSVE-NEXT:    fcvt s0, h0
605; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
606; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
607; NONEON-NOSVE-NEXT:    fcvt s1, h1
608; NONEON-NOSVE-NEXT:    fcvt h0, s0
609; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
610; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
611; NONEON-NOSVE-NEXT:    fcvt s0, h0
612; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
613; NONEON-NOSVE-NEXT:    fcvt h0, s0
614; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
615; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
616; NONEON-NOSVE-NEXT:    add sp, sp, #32
617; NONEON-NOSVE-NEXT:    ret
618  %res = fdiv <4 x half> %op1, %op2
619  ret <4 x half> %res
620}
621
622define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) {
623; CHECK-LABEL: fdiv_v8f16:
624; CHECK:       // %bb.0:
625; CHECK-NEXT:    ptrue p0.h, vl8
626; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
627; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
628; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
629; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
630; CHECK-NEXT:    ret
631;
632; NONEON-NOSVE-LABEL: fdiv_v8f16:
633; NONEON-NOSVE:       // %bb.0:
634; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
635; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
636; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
637; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
638; NONEON-NOSVE-NEXT:    fcvt s0, h0
639; NONEON-NOSVE-NEXT:    fcvt s1, h1
640; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
641; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
642; NONEON-NOSVE-NEXT:    fcvt s1, h1
643; NONEON-NOSVE-NEXT:    fcvt h0, s0
644; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
645; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
646; NONEON-NOSVE-NEXT:    fcvt s0, h0
647; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
648; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
649; NONEON-NOSVE-NEXT:    fcvt s1, h1
650; NONEON-NOSVE-NEXT:    fcvt h0, s0
651; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
652; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
653; NONEON-NOSVE-NEXT:    fcvt s0, h0
654; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
655; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
656; NONEON-NOSVE-NEXT:    fcvt s1, h1
657; NONEON-NOSVE-NEXT:    fcvt h0, s0
658; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
659; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
660; NONEON-NOSVE-NEXT:    fcvt s0, h0
661; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
662; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
663; NONEON-NOSVE-NEXT:    fcvt s1, h1
664; NONEON-NOSVE-NEXT:    fcvt h0, s0
665; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
666; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
667; NONEON-NOSVE-NEXT:    fcvt s0, h0
668; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
669; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
670; NONEON-NOSVE-NEXT:    fcvt s1, h1
671; NONEON-NOSVE-NEXT:    fcvt h0, s0
672; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
673; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
674; NONEON-NOSVE-NEXT:    fcvt s0, h0
675; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
676; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
677; NONEON-NOSVE-NEXT:    fcvt s1, h1
678; NONEON-NOSVE-NEXT:    fcvt h0, s0
679; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
680; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
681; NONEON-NOSVE-NEXT:    fcvt s0, h0
682; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
683; NONEON-NOSVE-NEXT:    ldr h1, [sp]
684; NONEON-NOSVE-NEXT:    fcvt s1, h1
685; NONEON-NOSVE-NEXT:    fcvt h0, s0
686; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
687; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
688; NONEON-NOSVE-NEXT:    fcvt s0, h0
689; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
690; NONEON-NOSVE-NEXT:    fcvt h0, s0
691; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
692; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
693; NONEON-NOSVE-NEXT:    add sp, sp, #48
694; NONEON-NOSVE-NEXT:    ret
695  %res = fdiv <8 x half> %op1, %op2
696  ret <8 x half> %res
697}
698
699define void @fdiv_v16f16(ptr %a, ptr %b) {
700; CHECK-LABEL: fdiv_v16f16:
701; CHECK:       // %bb.0:
702; CHECK-NEXT:    ldp q0, q3, [x1]
703; CHECK-NEXT:    ptrue p0.h, vl8
704; CHECK-NEXT:    ldp q1, q2, [x0]
705; CHECK-NEXT:    fdivr z0.h, p0/m, z0.h, z1.h
706; CHECK-NEXT:    movprfx z1, z2
707; CHECK-NEXT:    fdiv z1.h, p0/m, z1.h, z3.h
708; CHECK-NEXT:    stp q0, q1, [x0]
709; CHECK-NEXT:    ret
710;
711; NONEON-NOSVE-LABEL: fdiv_v16f16:
712; NONEON-NOSVE:       // %bb.0:
713; NONEON-NOSVE-NEXT:    sub sp, sp, #96
714; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
715; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
716; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
717; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
718; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
719; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
720; NONEON-NOSVE-NEXT:    ldr h1, [sp, #46]
721; NONEON-NOSVE-NEXT:    fcvt s0, h0
722; NONEON-NOSVE-NEXT:    fcvt s1, h1
723; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
724; NONEON-NOSVE-NEXT:    ldr h1, [sp, #44]
725; NONEON-NOSVE-NEXT:    fcvt s1, h1
726; NONEON-NOSVE-NEXT:    fcvt h0, s0
727; NONEON-NOSVE-NEXT:    str h0, [sp, #94]
728; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
729; NONEON-NOSVE-NEXT:    fcvt s0, h0
730; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
731; NONEON-NOSVE-NEXT:    ldr h1, [sp, #42]
732; NONEON-NOSVE-NEXT:    fcvt s1, h1
733; NONEON-NOSVE-NEXT:    fcvt h0, s0
734; NONEON-NOSVE-NEXT:    str h0, [sp, #92]
735; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
736; NONEON-NOSVE-NEXT:    fcvt s0, h0
737; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
738; NONEON-NOSVE-NEXT:    ldr h1, [sp, #40]
739; NONEON-NOSVE-NEXT:    fcvt s1, h1
740; NONEON-NOSVE-NEXT:    fcvt h0, s0
741; NONEON-NOSVE-NEXT:    str h0, [sp, #90]
742; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
743; NONEON-NOSVE-NEXT:    fcvt s0, h0
744; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
745; NONEON-NOSVE-NEXT:    ldr h1, [sp, #38]
746; NONEON-NOSVE-NEXT:    fcvt s1, h1
747; NONEON-NOSVE-NEXT:    fcvt h0, s0
748; NONEON-NOSVE-NEXT:    str h0, [sp, #88]
749; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
750; NONEON-NOSVE-NEXT:    fcvt s0, h0
751; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
752; NONEON-NOSVE-NEXT:    ldr h1, [sp, #36]
753; NONEON-NOSVE-NEXT:    fcvt s1, h1
754; NONEON-NOSVE-NEXT:    fcvt h0, s0
755; NONEON-NOSVE-NEXT:    str h0, [sp, #86]
756; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
757; NONEON-NOSVE-NEXT:    fcvt s0, h0
758; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
759; NONEON-NOSVE-NEXT:    ldr h1, [sp, #34]
760; NONEON-NOSVE-NEXT:    fcvt s1, h1
761; NONEON-NOSVE-NEXT:    fcvt h0, s0
762; NONEON-NOSVE-NEXT:    str h0, [sp, #84]
763; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
764; NONEON-NOSVE-NEXT:    fcvt s0, h0
765; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
766; NONEON-NOSVE-NEXT:    ldr h1, [sp, #32]
767; NONEON-NOSVE-NEXT:    fcvt s1, h1
768; NONEON-NOSVE-NEXT:    fcvt h0, s0
769; NONEON-NOSVE-NEXT:    str h0, [sp, #82]
770; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
771; NONEON-NOSVE-NEXT:    fcvt s0, h0
772; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
773; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
774; NONEON-NOSVE-NEXT:    fcvt s1, h1
775; NONEON-NOSVE-NEXT:    fcvt h0, s0
776; NONEON-NOSVE-NEXT:    str h0, [sp, #80]
777; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
778; NONEON-NOSVE-NEXT:    fcvt s0, h0
779; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
780; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
781; NONEON-NOSVE-NEXT:    fcvt s1, h1
782; NONEON-NOSVE-NEXT:    fcvt h0, s0
783; NONEON-NOSVE-NEXT:    str h0, [sp, #78]
784; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
785; NONEON-NOSVE-NEXT:    fcvt s0, h0
786; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
787; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
788; NONEON-NOSVE-NEXT:    fcvt s1, h1
789; NONEON-NOSVE-NEXT:    fcvt h0, s0
790; NONEON-NOSVE-NEXT:    str h0, [sp, #76]
791; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
792; NONEON-NOSVE-NEXT:    fcvt s0, h0
793; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
794; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
795; NONEON-NOSVE-NEXT:    fcvt s1, h1
796; NONEON-NOSVE-NEXT:    fcvt h0, s0
797; NONEON-NOSVE-NEXT:    str h0, [sp, #74]
798; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
799; NONEON-NOSVE-NEXT:    fcvt s0, h0
800; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
801; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
802; NONEON-NOSVE-NEXT:    fcvt s1, h1
803; NONEON-NOSVE-NEXT:    fcvt h0, s0
804; NONEON-NOSVE-NEXT:    str h0, [sp, #72]
805; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
806; NONEON-NOSVE-NEXT:    fcvt s0, h0
807; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
808; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
809; NONEON-NOSVE-NEXT:    fcvt s1, h1
810; NONEON-NOSVE-NEXT:    fcvt h0, s0
811; NONEON-NOSVE-NEXT:    str h0, [sp, #70]
812; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
813; NONEON-NOSVE-NEXT:    fcvt s0, h0
814; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
815; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
816; NONEON-NOSVE-NEXT:    fcvt s1, h1
817; NONEON-NOSVE-NEXT:    fcvt h0, s0
818; NONEON-NOSVE-NEXT:    str h0, [sp, #68]
819; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
820; NONEON-NOSVE-NEXT:    fcvt s0, h0
821; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
822; NONEON-NOSVE-NEXT:    ldr h1, [sp]
823; NONEON-NOSVE-NEXT:    fcvt s1, h1
824; NONEON-NOSVE-NEXT:    fcvt h0, s0
825; NONEON-NOSVE-NEXT:    str h0, [sp, #66]
826; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
827; NONEON-NOSVE-NEXT:    fcvt s0, h0
828; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
829; NONEON-NOSVE-NEXT:    fcvt h0, s0
830; NONEON-NOSVE-NEXT:    str h0, [sp, #64]
831; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
832; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
833; NONEON-NOSVE-NEXT:    add sp, sp, #96
834; NONEON-NOSVE-NEXT:    ret
835  %op1 = load <16 x half>, ptr %a
836  %op2 = load <16 x half>, ptr %b
837  %res = fdiv <16 x half> %op1, %op2
838  store <16 x half> %res, ptr %a
839  ret void
840}
841
842define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) {
843; CHECK-LABEL: fdiv_v2f32:
844; CHECK:       // %bb.0:
845; CHECK-NEXT:    ptrue p0.s, vl2
846; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
847; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
848; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
849; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
850; CHECK-NEXT:    ret
851;
852; NONEON-NOSVE-LABEL: fdiv_v2f32:
853; NONEON-NOSVE:       // %bb.0:
854; NONEON-NOSVE-NEXT:    sub sp, sp, #32
855; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
856; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
857; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
858; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
859; NONEON-NOSVE-NEXT:    fdiv s3, s2, s0
860; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
861; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
862; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #24]
863; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
864; NONEON-NOSVE-NEXT:    add sp, sp, #32
865; NONEON-NOSVE-NEXT:    ret
866  %res = fdiv <2 x float> %op1, %op2
867  ret <2 x float> %res
868}
869
870define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) {
871; CHECK-LABEL: fdiv_v4f32:
872; CHECK:       // %bb.0:
873; CHECK-NEXT:    ptrue p0.s, vl4
874; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
875; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
876; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
877; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
878; CHECK-NEXT:    ret
879;
880; NONEON-NOSVE-LABEL: fdiv_v4f32:
881; NONEON-NOSVE:       // %bb.0:
882; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
883; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
884; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
885; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
886; NONEON-NOSVE-NEXT:    fdiv s3, s2, s0
887; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
888; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
889; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
890; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #40]
891; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
892; NONEON-NOSVE-NEXT:    fdiv s3, s2, s0
893; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
894; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
895; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #32]
896; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
897; NONEON-NOSVE-NEXT:    add sp, sp, #48
898; NONEON-NOSVE-NEXT:    ret
899  %res = fdiv <4 x float> %op1, %op2
900  ret <4 x float> %res
901}
902
903define void @fdiv_v8f32(ptr %a, ptr %b) {
904; CHECK-LABEL: fdiv_v8f32:
905; CHECK:       // %bb.0:
906; CHECK-NEXT:    ldp q0, q3, [x1]
907; CHECK-NEXT:    ptrue p0.s, vl4
908; CHECK-NEXT:    ldp q1, q2, [x0]
909; CHECK-NEXT:    fdivr z0.s, p0/m, z0.s, z1.s
910; CHECK-NEXT:    movprfx z1, z2
911; CHECK-NEXT:    fdiv z1.s, p0/m, z1.s, z3.s
912; CHECK-NEXT:    stp q0, q1, [x0]
913; CHECK-NEXT:    ret
914;
915; NONEON-NOSVE-LABEL: fdiv_v8f32:
916; NONEON-NOSVE:       // %bb.0:
917; NONEON-NOSVE-NEXT:    sub sp, sp, #96
918; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
919; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
920; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
921; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
922; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
923; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #40]
924; NONEON-NOSVE-NEXT:    ldr s0, [sp, #60]
925; NONEON-NOSVE-NEXT:    fdiv s3, s2, s0
926; NONEON-NOSVE-NEXT:    ldr s0, [sp, #56]
927; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
928; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #32]
929; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #88]
930; NONEON-NOSVE-NEXT:    ldr s0, [sp, #52]
931; NONEON-NOSVE-NEXT:    fdiv s3, s2, s0
932; NONEON-NOSVE-NEXT:    ldr s0, [sp, #48]
933; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
934; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
935; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #80]
936; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
937; NONEON-NOSVE-NEXT:    fdiv s3, s2, s0
938; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
939; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
940; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
941; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #72]
942; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
943; NONEON-NOSVE-NEXT:    fdiv s3, s2, s0
944; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
945; NONEON-NOSVE-NEXT:    fdiv s0, s1, s0
946; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #64]
947; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
948; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
949; NONEON-NOSVE-NEXT:    add sp, sp, #96
950; NONEON-NOSVE-NEXT:    ret
951  %op1 = load <8 x float>, ptr %a
952  %op2 = load <8 x float>, ptr %b
953  %res = fdiv <8 x float> %op1, %op2
954  store <8 x float> %res, ptr %a
955  ret void
956}
957
958define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) {
959; CHECK-LABEL: fdiv_v2f64:
960; CHECK:       // %bb.0:
961; CHECK-NEXT:    ptrue p0.d, vl2
962; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
963; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
964; CHECK-NEXT:    fdiv z0.d, p0/m, z0.d, z1.d
965; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
966; CHECK-NEXT:    ret
967;
968; NONEON-NOSVE-LABEL: fdiv_v2f64:
969; NONEON-NOSVE:       // %bb.0:
970; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
971; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
972; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
973; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
974; NONEON-NOSVE-NEXT:    fdiv d3, d2, d0
975; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
976; NONEON-NOSVE-NEXT:    fdiv d0, d1, d0
977; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #32]
978; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
979; NONEON-NOSVE-NEXT:    add sp, sp, #48
980; NONEON-NOSVE-NEXT:    ret
981  %res = fdiv <2 x double> %op1, %op2
982  ret <2 x double> %res
983}
984
985define void @fdiv_v4f64(ptr %a, ptr %b) {
986; CHECK-LABEL: fdiv_v4f64:
987; CHECK:       // %bb.0:
988; CHECK-NEXT:    ldp q0, q3, [x1]
989; CHECK-NEXT:    ptrue p0.d, vl2
990; CHECK-NEXT:    ldp q1, q2, [x0]
991; CHECK-NEXT:    fdivr z0.d, p0/m, z0.d, z1.d
992; CHECK-NEXT:    movprfx z1, z2
993; CHECK-NEXT:    fdiv z1.d, p0/m, z1.d, z3.d
994; CHECK-NEXT:    stp q0, q1, [x0]
995; CHECK-NEXT:    ret
996;
997; NONEON-NOSVE-LABEL: fdiv_v4f64:
998; NONEON-NOSVE:       // %bb.0:
999; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1000; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1001; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
1002; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
1003; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
1004; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
1005; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp, #32]
1006; NONEON-NOSVE-NEXT:    ldr d0, [sp, #56]
1007; NONEON-NOSVE-NEXT:    fdiv d3, d2, d0
1008; NONEON-NOSVE-NEXT:    ldr d0, [sp, #48]
1009; NONEON-NOSVE-NEXT:    fdiv d0, d1, d0
1010; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
1011; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #80]
1012; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1013; NONEON-NOSVE-NEXT:    fdiv d3, d2, d0
1014; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
1015; NONEON-NOSVE-NEXT:    fdiv d0, d1, d0
1016; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #64]
1017; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1018; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1019; NONEON-NOSVE-NEXT:    add sp, sp, #96
1020; NONEON-NOSVE-NEXT:    ret
1021  %op1 = load <4 x double>, ptr %a
1022  %op2 = load <4 x double>, ptr %b
1023  %res = fdiv <4 x double> %op1, %op2
1024  store <4 x double> %res, ptr %a
1025  ret void
1026}
1027
1028;
1029; FMA
1030;
1031
1032define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3) {
1033; CHECK-LABEL: fma_v2f16:
1034; CHECK:       // %bb.0:
1035; CHECK-NEXT:    ptrue p0.h, vl4
1036; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1037; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
1038; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1039; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
1040; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1041; CHECK-NEXT:    ret
1042;
1043; NONEON-NOSVE-LABEL: fma_v2f16:
1044; NONEON-NOSVE:       // %bb.0:
1045; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1046; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1047; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #8]
1048; NONEON-NOSVE-NEXT:    str d0, [sp]
1049; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
1050; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1051; NONEON-NOSVE-NEXT:    ldr h2, [sp, #6]
1052; NONEON-NOSVE-NEXT:    fcvt s0, h0
1053; NONEON-NOSVE-NEXT:    fcvt s1, h1
1054; NONEON-NOSVE-NEXT:    fcvt s2, h2
1055; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1056; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1057; NONEON-NOSVE-NEXT:    ldr h2, [sp, #4]
1058; NONEON-NOSVE-NEXT:    fcvt s1, h1
1059; NONEON-NOSVE-NEXT:    fcvt s2, h2
1060; NONEON-NOSVE-NEXT:    fcvt h0, s0
1061; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
1062; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
1063; NONEON-NOSVE-NEXT:    fcvt s0, h0
1064; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1065; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1066; NONEON-NOSVE-NEXT:    ldr h2, [sp, #2]
1067; NONEON-NOSVE-NEXT:    fcvt s1, h1
1068; NONEON-NOSVE-NEXT:    fcvt s2, h2
1069; NONEON-NOSVE-NEXT:    fcvt h0, s0
1070; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
1071; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1072; NONEON-NOSVE-NEXT:    fcvt s0, h0
1073; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1074; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1075; NONEON-NOSVE-NEXT:    ldr h2, [sp]
1076; NONEON-NOSVE-NEXT:    fcvt s1, h1
1077; NONEON-NOSVE-NEXT:    fcvt s2, h2
1078; NONEON-NOSVE-NEXT:    fcvt h0, s0
1079; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
1080; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
1081; NONEON-NOSVE-NEXT:    fcvt s0, h0
1082; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1083; NONEON-NOSVE-NEXT:    fcvt h0, s0
1084; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
1085; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1086; NONEON-NOSVE-NEXT:    add sp, sp, #32
1087; NONEON-NOSVE-NEXT:    ret
1088  %res = call <2 x half> @llvm.fma.v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3)
1089  ret <2 x half> %res
1090}
1091
1092define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) {
1093; CHECK-LABEL: fma_v4f16:
1094; CHECK:       // %bb.0:
1095; CHECK-NEXT:    ptrue p0.h, vl4
1096; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1097; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
1098; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1099; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
1100; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1101; CHECK-NEXT:    ret
1102;
1103; NONEON-NOSVE-LABEL: fma_v4f16:
1104; NONEON-NOSVE:       // %bb.0:
1105; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1106; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1107; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #8]
1108; NONEON-NOSVE-NEXT:    str d0, [sp]
1109; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
1110; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1111; NONEON-NOSVE-NEXT:    ldr h2, [sp, #6]
1112; NONEON-NOSVE-NEXT:    fcvt s0, h0
1113; NONEON-NOSVE-NEXT:    fcvt s1, h1
1114; NONEON-NOSVE-NEXT:    fcvt s2, h2
1115; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1116; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1117; NONEON-NOSVE-NEXT:    ldr h2, [sp, #4]
1118; NONEON-NOSVE-NEXT:    fcvt s1, h1
1119; NONEON-NOSVE-NEXT:    fcvt s2, h2
1120; NONEON-NOSVE-NEXT:    fcvt h0, s0
1121; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
1122; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
1123; NONEON-NOSVE-NEXT:    fcvt s0, h0
1124; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1125; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1126; NONEON-NOSVE-NEXT:    ldr h2, [sp, #2]
1127; NONEON-NOSVE-NEXT:    fcvt s1, h1
1128; NONEON-NOSVE-NEXT:    fcvt s2, h2
1129; NONEON-NOSVE-NEXT:    fcvt h0, s0
1130; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
1131; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1132; NONEON-NOSVE-NEXT:    fcvt s0, h0
1133; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1134; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1135; NONEON-NOSVE-NEXT:    ldr h2, [sp]
1136; NONEON-NOSVE-NEXT:    fcvt s1, h1
1137; NONEON-NOSVE-NEXT:    fcvt s2, h2
1138; NONEON-NOSVE-NEXT:    fcvt h0, s0
1139; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
1140; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
1141; NONEON-NOSVE-NEXT:    fcvt s0, h0
1142; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1143; NONEON-NOSVE-NEXT:    fcvt h0, s0
1144; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
1145; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1146; NONEON-NOSVE-NEXT:    add sp, sp, #32
1147; NONEON-NOSVE-NEXT:    ret
1148  %res = call <4 x half> @llvm.fma.v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3)
1149  ret <4 x half> %res
1150}
1151
1152define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) {
1153; CHECK-LABEL: fma_v8f16:
1154; CHECK:       // %bb.0:
1155; CHECK-NEXT:    ptrue p0.h, vl8
1156; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1157; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
1158; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
1159; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
1160; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1161; CHECK-NEXT:    ret
1162;
1163; NONEON-NOSVE-LABEL: fma_v8f16:
1164; NONEON-NOSVE:       // %bb.0:
1165; NONEON-NOSVE-NEXT:    sub sp, sp, #64
1166; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
1167; NONEON-NOSVE-NEXT:    stp q1, q2, [sp, #16]
1168; NONEON-NOSVE-NEXT:    str q0, [sp]
1169; NONEON-NOSVE-NEXT:    ldr h0, [sp, #46]
1170; NONEON-NOSVE-NEXT:    ldr h1, [sp, #30]
1171; NONEON-NOSVE-NEXT:    ldr h2, [sp, #14]
1172; NONEON-NOSVE-NEXT:    fcvt s0, h0
1173; NONEON-NOSVE-NEXT:    fcvt s1, h1
1174; NONEON-NOSVE-NEXT:    fcvt s2, h2
1175; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1176; NONEON-NOSVE-NEXT:    ldr h1, [sp, #28]
1177; NONEON-NOSVE-NEXT:    ldr h2, [sp, #12]
1178; NONEON-NOSVE-NEXT:    fcvt s1, h1
1179; NONEON-NOSVE-NEXT:    fcvt s2, h2
1180; NONEON-NOSVE-NEXT:    fcvt h0, s0
1181; NONEON-NOSVE-NEXT:    str h0, [sp, #62]
1182; NONEON-NOSVE-NEXT:    ldr h0, [sp, #44]
1183; NONEON-NOSVE-NEXT:    fcvt s0, h0
1184; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1185; NONEON-NOSVE-NEXT:    ldr h1, [sp, #26]
1186; NONEON-NOSVE-NEXT:    ldr h2, [sp, #10]
1187; NONEON-NOSVE-NEXT:    fcvt s1, h1
1188; NONEON-NOSVE-NEXT:    fcvt s2, h2
1189; NONEON-NOSVE-NEXT:    fcvt h0, s0
1190; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
1191; NONEON-NOSVE-NEXT:    ldr h0, [sp, #42]
1192; NONEON-NOSVE-NEXT:    fcvt s0, h0
1193; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1194; NONEON-NOSVE-NEXT:    ldr h1, [sp, #24]
1195; NONEON-NOSVE-NEXT:    ldr h2, [sp, #8]
1196; NONEON-NOSVE-NEXT:    fcvt s1, h1
1197; NONEON-NOSVE-NEXT:    fcvt s2, h2
1198; NONEON-NOSVE-NEXT:    fcvt h0, s0
1199; NONEON-NOSVE-NEXT:    str h0, [sp, #58]
1200; NONEON-NOSVE-NEXT:    ldr h0, [sp, #40]
1201; NONEON-NOSVE-NEXT:    fcvt s0, h0
1202; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1203; NONEON-NOSVE-NEXT:    ldr h1, [sp, #22]
1204; NONEON-NOSVE-NEXT:    ldr h2, [sp, #6]
1205; NONEON-NOSVE-NEXT:    fcvt s1, h1
1206; NONEON-NOSVE-NEXT:    fcvt s2, h2
1207; NONEON-NOSVE-NEXT:    fcvt h0, s0
1208; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
1209; NONEON-NOSVE-NEXT:    ldr h0, [sp, #38]
1210; NONEON-NOSVE-NEXT:    fcvt s0, h0
1211; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1212; NONEON-NOSVE-NEXT:    ldr h1, [sp, #20]
1213; NONEON-NOSVE-NEXT:    ldr h2, [sp, #4]
1214; NONEON-NOSVE-NEXT:    fcvt s1, h1
1215; NONEON-NOSVE-NEXT:    fcvt s2, h2
1216; NONEON-NOSVE-NEXT:    fcvt h0, s0
1217; NONEON-NOSVE-NEXT:    str h0, [sp, #54]
1218; NONEON-NOSVE-NEXT:    ldr h0, [sp, #36]
1219; NONEON-NOSVE-NEXT:    fcvt s0, h0
1220; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1221; NONEON-NOSVE-NEXT:    ldr h1, [sp, #18]
1222; NONEON-NOSVE-NEXT:    ldr h2, [sp, #2]
1223; NONEON-NOSVE-NEXT:    fcvt s1, h1
1224; NONEON-NOSVE-NEXT:    fcvt s2, h2
1225; NONEON-NOSVE-NEXT:    fcvt h0, s0
1226; NONEON-NOSVE-NEXT:    str h0, [sp, #52]
1227; NONEON-NOSVE-NEXT:    ldr h0, [sp, #34]
1228; NONEON-NOSVE-NEXT:    fcvt s0, h0
1229; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1230; NONEON-NOSVE-NEXT:    ldr h1, [sp, #16]
1231; NONEON-NOSVE-NEXT:    ldr h2, [sp]
1232; NONEON-NOSVE-NEXT:    fcvt s1, h1
1233; NONEON-NOSVE-NEXT:    fcvt s2, h2
1234; NONEON-NOSVE-NEXT:    fcvt h0, s0
1235; NONEON-NOSVE-NEXT:    str h0, [sp, #50]
1236; NONEON-NOSVE-NEXT:    ldr h0, [sp, #32]
1237; NONEON-NOSVE-NEXT:    fcvt s0, h0
1238; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1239; NONEON-NOSVE-NEXT:    fcvt h0, s0
1240; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
1241; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
1242; NONEON-NOSVE-NEXT:    add sp, sp, #64
1243; NONEON-NOSVE-NEXT:    ret
1244  %res = call <8 x half> @llvm.fma.v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3)
1245  ret <8 x half> %res
1246}
1247
1248define void @fma_v16f16(ptr %a, ptr %b, ptr %c) {
1249; CHECK-LABEL: fma_v16f16:
1250; CHECK:       // %bb.0:
1251; CHECK-NEXT:    ldp q0, q4, [x1]
1252; CHECK-NEXT:    ptrue p0.h, vl8
1253; CHECK-NEXT:    ldp q1, q5, [x2]
1254; CHECK-NEXT:    ldp q2, q3, [x0]
1255; CHECK-NEXT:    fmad z0.h, p0/m, z2.h, z1.h
1256; CHECK-NEXT:    movprfx z1, z5
1257; CHECK-NEXT:    fmla z1.h, p0/m, z3.h, z4.h
1258; CHECK-NEXT:    stp q0, q1, [x0]
1259; CHECK-NEXT:    ret
1260;
1261; NONEON-NOSVE-LABEL: fma_v16f16:
1262; NONEON-NOSVE:       // %bb.0:
1263; NONEON-NOSVE-NEXT:    sub sp, sp, #128
1264; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 128
1265; NONEON-NOSVE-NEXT:    ldp q1, q0, [x2]
1266; NONEON-NOSVE-NEXT:    ldp q2, q3, [x1]
1267; NONEON-NOSVE-NEXT:    ldp q4, q5, [x0]
1268; NONEON-NOSVE-NEXT:    stp q3, q0, [sp, #64]
1269; NONEON-NOSVE-NEXT:    stp q4, q2, [sp]
1270; NONEON-NOSVE-NEXT:    ldr h0, [sp, #94]
1271; NONEON-NOSVE-NEXT:    stp q1, q5, [sp, #32]
1272; NONEON-NOSVE-NEXT:    ldr h1, [sp, #78]
1273; NONEON-NOSVE-NEXT:    ldr h2, [sp, #62]
1274; NONEON-NOSVE-NEXT:    fcvt s0, h0
1275; NONEON-NOSVE-NEXT:    fcvt s1, h1
1276; NONEON-NOSVE-NEXT:    fcvt s2, h2
1277; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1278; NONEON-NOSVE-NEXT:    ldr h1, [sp, #76]
1279; NONEON-NOSVE-NEXT:    ldr h2, [sp, #60]
1280; NONEON-NOSVE-NEXT:    fcvt s1, h1
1281; NONEON-NOSVE-NEXT:    fcvt s2, h2
1282; NONEON-NOSVE-NEXT:    fcvt h0, s0
1283; NONEON-NOSVE-NEXT:    str h0, [sp, #126]
1284; NONEON-NOSVE-NEXT:    ldr h0, [sp, #92]
1285; NONEON-NOSVE-NEXT:    fcvt s0, h0
1286; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1287; NONEON-NOSVE-NEXT:    ldr h1, [sp, #74]
1288; NONEON-NOSVE-NEXT:    ldr h2, [sp, #58]
1289; NONEON-NOSVE-NEXT:    fcvt s1, h1
1290; NONEON-NOSVE-NEXT:    fcvt s2, h2
1291; NONEON-NOSVE-NEXT:    fcvt h0, s0
1292; NONEON-NOSVE-NEXT:    str h0, [sp, #124]
1293; NONEON-NOSVE-NEXT:    ldr h0, [sp, #90]
1294; NONEON-NOSVE-NEXT:    fcvt s0, h0
1295; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1296; NONEON-NOSVE-NEXT:    ldr h1, [sp, #72]
1297; NONEON-NOSVE-NEXT:    ldr h2, [sp, #56]
1298; NONEON-NOSVE-NEXT:    fcvt s1, h1
1299; NONEON-NOSVE-NEXT:    fcvt s2, h2
1300; NONEON-NOSVE-NEXT:    fcvt h0, s0
1301; NONEON-NOSVE-NEXT:    str h0, [sp, #122]
1302; NONEON-NOSVE-NEXT:    ldr h0, [sp, #88]
1303; NONEON-NOSVE-NEXT:    fcvt s0, h0
1304; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1305; NONEON-NOSVE-NEXT:    ldr h1, [sp, #70]
1306; NONEON-NOSVE-NEXT:    ldr h2, [sp, #54]
1307; NONEON-NOSVE-NEXT:    fcvt s1, h1
1308; NONEON-NOSVE-NEXT:    fcvt s2, h2
1309; NONEON-NOSVE-NEXT:    fcvt h0, s0
1310; NONEON-NOSVE-NEXT:    str h0, [sp, #120]
1311; NONEON-NOSVE-NEXT:    ldr h0, [sp, #86]
1312; NONEON-NOSVE-NEXT:    fcvt s0, h0
1313; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1314; NONEON-NOSVE-NEXT:    ldr h1, [sp, #68]
1315; NONEON-NOSVE-NEXT:    ldr h2, [sp, #52]
1316; NONEON-NOSVE-NEXT:    fcvt s1, h1
1317; NONEON-NOSVE-NEXT:    fcvt s2, h2
1318; NONEON-NOSVE-NEXT:    fcvt h0, s0
1319; NONEON-NOSVE-NEXT:    str h0, [sp, #118]
1320; NONEON-NOSVE-NEXT:    ldr h0, [sp, #84]
1321; NONEON-NOSVE-NEXT:    fcvt s0, h0
1322; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1323; NONEON-NOSVE-NEXT:    ldr h1, [sp, #66]
1324; NONEON-NOSVE-NEXT:    ldr h2, [sp, #50]
1325; NONEON-NOSVE-NEXT:    fcvt s1, h1
1326; NONEON-NOSVE-NEXT:    fcvt s2, h2
1327; NONEON-NOSVE-NEXT:    fcvt h0, s0
1328; NONEON-NOSVE-NEXT:    str h0, [sp, #116]
1329; NONEON-NOSVE-NEXT:    ldr h0, [sp, #82]
1330; NONEON-NOSVE-NEXT:    fcvt s0, h0
1331; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1332; NONEON-NOSVE-NEXT:    ldr h1, [sp, #64]
1333; NONEON-NOSVE-NEXT:    ldr h2, [sp, #48]
1334; NONEON-NOSVE-NEXT:    fcvt s1, h1
1335; NONEON-NOSVE-NEXT:    fcvt s2, h2
1336; NONEON-NOSVE-NEXT:    fcvt h0, s0
1337; NONEON-NOSVE-NEXT:    str h0, [sp, #114]
1338; NONEON-NOSVE-NEXT:    ldr h0, [sp, #80]
1339; NONEON-NOSVE-NEXT:    fcvt s0, h0
1340; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1341; NONEON-NOSVE-NEXT:    ldr h1, [sp, #30]
1342; NONEON-NOSVE-NEXT:    ldr h2, [sp, #14]
1343; NONEON-NOSVE-NEXT:    fcvt s1, h1
1344; NONEON-NOSVE-NEXT:    fcvt s2, h2
1345; NONEON-NOSVE-NEXT:    fcvt h0, s0
1346; NONEON-NOSVE-NEXT:    str h0, [sp, #112]
1347; NONEON-NOSVE-NEXT:    ldr h0, [sp, #46]
1348; NONEON-NOSVE-NEXT:    fcvt s0, h0
1349; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1350; NONEON-NOSVE-NEXT:    ldr h1, [sp, #28]
1351; NONEON-NOSVE-NEXT:    ldr h2, [sp, #12]
1352; NONEON-NOSVE-NEXT:    fcvt s1, h1
1353; NONEON-NOSVE-NEXT:    fcvt s2, h2
1354; NONEON-NOSVE-NEXT:    fcvt h0, s0
1355; NONEON-NOSVE-NEXT:    str h0, [sp, #110]
1356; NONEON-NOSVE-NEXT:    ldr h0, [sp, #44]
1357; NONEON-NOSVE-NEXT:    fcvt s0, h0
1358; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1359; NONEON-NOSVE-NEXT:    ldr h1, [sp, #26]
1360; NONEON-NOSVE-NEXT:    ldr h2, [sp, #10]
1361; NONEON-NOSVE-NEXT:    fcvt s1, h1
1362; NONEON-NOSVE-NEXT:    fcvt s2, h2
1363; NONEON-NOSVE-NEXT:    fcvt h0, s0
1364; NONEON-NOSVE-NEXT:    str h0, [sp, #108]
1365; NONEON-NOSVE-NEXT:    ldr h0, [sp, #42]
1366; NONEON-NOSVE-NEXT:    fcvt s0, h0
1367; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1368; NONEON-NOSVE-NEXT:    ldr h1, [sp, #24]
1369; NONEON-NOSVE-NEXT:    ldr h2, [sp, #8]
1370; NONEON-NOSVE-NEXT:    fcvt s1, h1
1371; NONEON-NOSVE-NEXT:    fcvt s2, h2
1372; NONEON-NOSVE-NEXT:    fcvt h0, s0
1373; NONEON-NOSVE-NEXT:    str h0, [sp, #106]
1374; NONEON-NOSVE-NEXT:    ldr h0, [sp, #40]
1375; NONEON-NOSVE-NEXT:    fcvt s0, h0
1376; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1377; NONEON-NOSVE-NEXT:    ldr h1, [sp, #22]
1378; NONEON-NOSVE-NEXT:    ldr h2, [sp, #6]
1379; NONEON-NOSVE-NEXT:    fcvt s1, h1
1380; NONEON-NOSVE-NEXT:    fcvt s2, h2
1381; NONEON-NOSVE-NEXT:    fcvt h0, s0
1382; NONEON-NOSVE-NEXT:    str h0, [sp, #104]
1383; NONEON-NOSVE-NEXT:    ldr h0, [sp, #38]
1384; NONEON-NOSVE-NEXT:    fcvt s0, h0
1385; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1386; NONEON-NOSVE-NEXT:    ldr h1, [sp, #20]
1387; NONEON-NOSVE-NEXT:    ldr h2, [sp, #4]
1388; NONEON-NOSVE-NEXT:    fcvt s1, h1
1389; NONEON-NOSVE-NEXT:    fcvt s2, h2
1390; NONEON-NOSVE-NEXT:    fcvt h0, s0
1391; NONEON-NOSVE-NEXT:    str h0, [sp, #102]
1392; NONEON-NOSVE-NEXT:    ldr h0, [sp, #36]
1393; NONEON-NOSVE-NEXT:    fcvt s0, h0
1394; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1395; NONEON-NOSVE-NEXT:    ldr h1, [sp, #18]
1396; NONEON-NOSVE-NEXT:    ldr h2, [sp, #2]
1397; NONEON-NOSVE-NEXT:    fcvt s1, h1
1398; NONEON-NOSVE-NEXT:    fcvt s2, h2
1399; NONEON-NOSVE-NEXT:    fcvt h0, s0
1400; NONEON-NOSVE-NEXT:    str h0, [sp, #100]
1401; NONEON-NOSVE-NEXT:    ldr h0, [sp, #34]
1402; NONEON-NOSVE-NEXT:    fcvt s0, h0
1403; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1404; NONEON-NOSVE-NEXT:    ldr h1, [sp, #16]
1405; NONEON-NOSVE-NEXT:    ldr h2, [sp]
1406; NONEON-NOSVE-NEXT:    fcvt s1, h1
1407; NONEON-NOSVE-NEXT:    fcvt s2, h2
1408; NONEON-NOSVE-NEXT:    fcvt h0, s0
1409; NONEON-NOSVE-NEXT:    str h0, [sp, #98]
1410; NONEON-NOSVE-NEXT:    ldr h0, [sp, #32]
1411; NONEON-NOSVE-NEXT:    fcvt s0, h0
1412; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1413; NONEON-NOSVE-NEXT:    fcvt h0, s0
1414; NONEON-NOSVE-NEXT:    str h0, [sp, #96]
1415; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
1416; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1417; NONEON-NOSVE-NEXT:    add sp, sp, #128
1418; NONEON-NOSVE-NEXT:    ret
1419  %op1 = load <16 x half>, ptr %a
1420  %op2 = load <16 x half>, ptr %b
1421  %op3 = load <16 x half>, ptr %c
1422  %res = call <16 x half> @llvm.fma.v16f16(<16 x half> %op1, <16 x half> %op2, <16 x half> %op3)
1423  store <16 x half> %res, ptr %a
1424  ret void
1425}
1426
1427define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) {
1428; CHECK-LABEL: fma_v2f32:
1429; CHECK:       // %bb.0:
1430; CHECK-NEXT:    ptrue p0.s, vl2
1431; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1432; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
1433; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1434; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
1435; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1436; CHECK-NEXT:    ret
1437;
1438; NONEON-NOSVE-LABEL: fma_v2f32:
1439; NONEON-NOSVE:       // %bb.0:
1440; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1441; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1442; NONEON-NOSVE-NEXT:    stp d1, d2, [sp, #8]
1443; NONEON-NOSVE-NEXT:    str d0, [sp]
1444; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #8]
1445; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp]
1446; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
1447; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
1448; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
1449; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1450; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #24]
1451; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1452; NONEON-NOSVE-NEXT:    add sp, sp, #32
1453; NONEON-NOSVE-NEXT:    ret
1454  %res = call <2 x float> @llvm.fma.v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3)
1455  ret <2 x float> %res
1456}
1457
1458define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) {
1459; CHECK-LABEL: fma_v4f32:
1460; CHECK:       // %bb.0:
1461; CHECK-NEXT:    ptrue p0.s, vl4
1462; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1463; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
1464; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
1465; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
1466; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1467; CHECK-NEXT:    ret
1468;
1469; NONEON-NOSVE-LABEL: fma_v4f32:
1470; NONEON-NOSVE:       // %bb.0:
1471; NONEON-NOSVE-NEXT:    sub sp, sp, #64
1472; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
1473; NONEON-NOSVE-NEXT:    stp q1, q2, [sp, #16]
1474; NONEON-NOSVE-NEXT:    str q0, [sp]
1475; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #24]
1476; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #8]
1477; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
1478; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
1479; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
1480; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1481; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #16]
1482; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp]
1483; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #56]
1484; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
1485; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
1486; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
1487; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1488; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #48]
1489; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
1490; NONEON-NOSVE-NEXT:    add sp, sp, #64
1491; NONEON-NOSVE-NEXT:    ret
1492  %res = call <4 x float> @llvm.fma.v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3)
1493  ret <4 x float> %res
1494}
1495
1496define void @fma_v8f32(ptr %a, ptr %b, ptr %c) {
1497; CHECK-LABEL: fma_v8f32:
1498; CHECK:       // %bb.0:
1499; CHECK-NEXT:    ldp q0, q4, [x1]
1500; CHECK-NEXT:    ptrue p0.s, vl4
1501; CHECK-NEXT:    ldp q1, q5, [x2]
1502; CHECK-NEXT:    ldp q2, q3, [x0]
1503; CHECK-NEXT:    fmad z0.s, p0/m, z2.s, z1.s
1504; CHECK-NEXT:    movprfx z1, z5
1505; CHECK-NEXT:    fmla z1.s, p0/m, z3.s, z4.s
1506; CHECK-NEXT:    stp q0, q1, [x0]
1507; CHECK-NEXT:    ret
1508;
1509; NONEON-NOSVE-LABEL: fma_v8f32:
1510; NONEON-NOSVE:       // %bb.0:
1511; NONEON-NOSVE-NEXT:    sub sp, sp, #128
1512; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 128
1513; NONEON-NOSVE-NEXT:    ldp q1, q0, [x2]
1514; NONEON-NOSVE-NEXT:    ldp q2, q3, [x1]
1515; NONEON-NOSVE-NEXT:    ldp q4, q5, [x0]
1516; NONEON-NOSVE-NEXT:    stp q3, q0, [sp, #64]
1517; NONEON-NOSVE-NEXT:    stp q4, q2, [sp]
1518; NONEON-NOSVE-NEXT:    ldr s0, [sp, #92]
1519; NONEON-NOSVE-NEXT:    stp q1, q5, [sp, #32]
1520; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #72]
1521; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #56]
1522; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
1523; NONEON-NOSVE-NEXT:    ldr s0, [sp, #88]
1524; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1525; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #64]
1526; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #48]
1527; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #120]
1528; NONEON-NOSVE-NEXT:    ldr s0, [sp, #84]
1529; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
1530; NONEON-NOSVE-NEXT:    ldr s0, [sp, #80]
1531; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1532; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #24]
1533; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp, #8]
1534; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #112]
1535; NONEON-NOSVE-NEXT:    ldr s0, [sp, #44]
1536; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
1537; NONEON-NOSVE-NEXT:    ldr s0, [sp, #40]
1538; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1539; NONEON-NOSVE-NEXT:    ldp s1, s3, [sp, #16]
1540; NONEON-NOSVE-NEXT:    ldp s2, s4, [sp]
1541; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #104]
1542; NONEON-NOSVE-NEXT:    ldr s0, [sp, #36]
1543; NONEON-NOSVE-NEXT:    fmadd s5, s4, s3, s0
1544; NONEON-NOSVE-NEXT:    ldr s0, [sp, #32]
1545; NONEON-NOSVE-NEXT:    fmadd s0, s2, s1, s0
1546; NONEON-NOSVE-NEXT:    stp s0, s5, [sp, #96]
1547; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
1548; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1549; NONEON-NOSVE-NEXT:    add sp, sp, #128
1550; NONEON-NOSVE-NEXT:    ret
1551  %op1 = load <8 x float>, ptr %a
1552  %op2 = load <8 x float>, ptr %b
1553  %op3 = load <8 x float>, ptr %c
1554  %res = call <8 x float> @llvm.fma.v8f32(<8 x float> %op1, <8 x float> %op2, <8 x float> %op3)
1555  store <8 x float> %res, ptr %a
1556  ret void
1557}
1558
1559define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) {
1560; CHECK-LABEL: fma_v2f64:
1561; CHECK:       // %bb.0:
1562; CHECK-NEXT:    ptrue p0.d, vl2
1563; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1564; CHECK-NEXT:    // kill: def $q2 killed $q2 def $z2
1565; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
1566; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
1567; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1568; CHECK-NEXT:    ret
1569;
1570; NONEON-NOSVE-LABEL: fma_v2f64:
1571; NONEON-NOSVE:       // %bb.0:
1572; NONEON-NOSVE-NEXT:    sub sp, sp, #64
1573; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
1574; NONEON-NOSVE-NEXT:    stp q1, q2, [sp, #16]
1575; NONEON-NOSVE-NEXT:    str q0, [sp]
1576; NONEON-NOSVE-NEXT:    ldp d1, d3, [sp, #16]
1577; NONEON-NOSVE-NEXT:    ldp d2, d4, [sp]
1578; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
1579; NONEON-NOSVE-NEXT:    fmadd d5, d4, d3, d0
1580; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
1581; NONEON-NOSVE-NEXT:    fmadd d0, d2, d1, d0
1582; NONEON-NOSVE-NEXT:    stp d0, d5, [sp, #48]
1583; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
1584; NONEON-NOSVE-NEXT:    add sp, sp, #64
1585; NONEON-NOSVE-NEXT:    ret
1586  %res = call <2 x double> @llvm.fma.v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3)
1587  ret <2 x double> %res
1588}
1589
1590define void @fma_v4f64(ptr %a, ptr %b, ptr %c) {
1591; CHECK-LABEL: fma_v4f64:
1592; CHECK:       // %bb.0:
1593; CHECK-NEXT:    ldp q0, q4, [x1]
1594; CHECK-NEXT:    ptrue p0.d, vl2
1595; CHECK-NEXT:    ldp q1, q5, [x2]
1596; CHECK-NEXT:    ldp q2, q3, [x0]
1597; CHECK-NEXT:    fmad z0.d, p0/m, z2.d, z1.d
1598; CHECK-NEXT:    movprfx z1, z5
1599; CHECK-NEXT:    fmla z1.d, p0/m, z3.d, z4.d
1600; CHECK-NEXT:    stp q0, q1, [x0]
1601; CHECK-NEXT:    ret
1602;
1603; NONEON-NOSVE-LABEL: fma_v4f64:
1604; NONEON-NOSVE:       // %bb.0:
1605; NONEON-NOSVE-NEXT:    sub sp, sp, #128
1606; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 128
1607; NONEON-NOSVE-NEXT:    ldp q1, q0, [x2]
1608; NONEON-NOSVE-NEXT:    ldp q2, q3, [x1]
1609; NONEON-NOSVE-NEXT:    ldp q4, q5, [x0]
1610; NONEON-NOSVE-NEXT:    stp q3, q0, [sp, #64]
1611; NONEON-NOSVE-NEXT:    stp q4, q2, [sp]
1612; NONEON-NOSVE-NEXT:    ldr d0, [sp, #88]
1613; NONEON-NOSVE-NEXT:    stp q1, q5, [sp, #32]
1614; NONEON-NOSVE-NEXT:    ldp d1, d3, [sp, #64]
1615; NONEON-NOSVE-NEXT:    ldp d2, d4, [sp, #48]
1616; NONEON-NOSVE-NEXT:    fmadd d5, d4, d3, d0
1617; NONEON-NOSVE-NEXT:    ldr d0, [sp, #80]
1618; NONEON-NOSVE-NEXT:    fmadd d0, d2, d1, d0
1619; NONEON-NOSVE-NEXT:    ldp d1, d3, [sp, #16]
1620; NONEON-NOSVE-NEXT:    ldp d2, d4, [sp]
1621; NONEON-NOSVE-NEXT:    stp d0, d5, [sp, #112]
1622; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
1623; NONEON-NOSVE-NEXT:    fmadd d5, d4, d3, d0
1624; NONEON-NOSVE-NEXT:    ldr d0, [sp, #32]
1625; NONEON-NOSVE-NEXT:    fmadd d0, d2, d1, d0
1626; NONEON-NOSVE-NEXT:    stp d0, d5, [sp, #96]
1627; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #96]
1628; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1629; NONEON-NOSVE-NEXT:    add sp, sp, #128
1630; NONEON-NOSVE-NEXT:    ret
1631  %op1 = load <4 x double>, ptr %a
1632  %op2 = load <4 x double>, ptr %b
1633  %op3 = load <4 x double>, ptr %c
1634  %res = call <4 x double> @llvm.fma.v4f64(<4 x double> %op1, <4 x double> %op2, <4 x double> %op3)
1635  store <4 x double> %res, ptr %a
1636  ret void
1637}
1638
1639;
1640; FMUL
1641;
1642
1643define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) {
1644; CHECK-LABEL: fmul_v2f16:
1645; CHECK:       // %bb.0:
1646; CHECK-NEXT:    ptrue p0.h, vl4
1647; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1648; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1649; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
1650; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1651; CHECK-NEXT:    ret
1652;
1653; NONEON-NOSVE-LABEL: fmul_v2f16:
1654; NONEON-NOSVE:       // %bb.0:
1655; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1656; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1657; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
1658; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
1659; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1660; NONEON-NOSVE-NEXT:    fcvt s0, h0
1661; NONEON-NOSVE-NEXT:    fcvt s1, h1
1662; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1663; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1664; NONEON-NOSVE-NEXT:    fcvt s1, h1
1665; NONEON-NOSVE-NEXT:    fcvt h0, s0
1666; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
1667; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
1668; NONEON-NOSVE-NEXT:    fcvt s0, h0
1669; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1670; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1671; NONEON-NOSVE-NEXT:    fcvt s1, h1
1672; NONEON-NOSVE-NEXT:    fcvt h0, s0
1673; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
1674; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1675; NONEON-NOSVE-NEXT:    fcvt s0, h0
1676; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1677; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1678; NONEON-NOSVE-NEXT:    fcvt s1, h1
1679; NONEON-NOSVE-NEXT:    fcvt h0, s0
1680; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
1681; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
1682; NONEON-NOSVE-NEXT:    fcvt s0, h0
1683; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1684; NONEON-NOSVE-NEXT:    fcvt h0, s0
1685; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
1686; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1687; NONEON-NOSVE-NEXT:    add sp, sp, #32
1688; NONEON-NOSVE-NEXT:    ret
1689  %res = fmul <2 x half> %op1, %op2
1690  ret <2 x half> %res
1691}
1692
1693define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) {
1694; CHECK-LABEL: fmul_v4f16:
1695; CHECK:       // %bb.0:
1696; CHECK-NEXT:    ptrue p0.h, vl4
1697; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1698; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1699; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
1700; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1701; CHECK-NEXT:    ret
1702;
1703; NONEON-NOSVE-LABEL: fmul_v4f16:
1704; NONEON-NOSVE:       // %bb.0:
1705; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1706; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1707; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
1708; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
1709; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1710; NONEON-NOSVE-NEXT:    fcvt s0, h0
1711; NONEON-NOSVE-NEXT:    fcvt s1, h1
1712; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1713; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1714; NONEON-NOSVE-NEXT:    fcvt s1, h1
1715; NONEON-NOSVE-NEXT:    fcvt h0, s0
1716; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
1717; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
1718; NONEON-NOSVE-NEXT:    fcvt s0, h0
1719; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1720; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1721; NONEON-NOSVE-NEXT:    fcvt s1, h1
1722; NONEON-NOSVE-NEXT:    fcvt h0, s0
1723; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
1724; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1725; NONEON-NOSVE-NEXT:    fcvt s0, h0
1726; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1727; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1728; NONEON-NOSVE-NEXT:    fcvt s1, h1
1729; NONEON-NOSVE-NEXT:    fcvt h0, s0
1730; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
1731; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
1732; NONEON-NOSVE-NEXT:    fcvt s0, h0
1733; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1734; NONEON-NOSVE-NEXT:    fcvt h0, s0
1735; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
1736; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1737; NONEON-NOSVE-NEXT:    add sp, sp, #32
1738; NONEON-NOSVE-NEXT:    ret
1739  %res = fmul <4 x half> %op1, %op2
1740  ret <4 x half> %res
1741}
1742
1743define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) {
1744; CHECK-LABEL: fmul_v8f16:
1745; CHECK:       // %bb.0:
1746; CHECK-NEXT:    ptrue p0.h, vl8
1747; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1748; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
1749; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
1750; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1751; CHECK-NEXT:    ret
1752;
1753; NONEON-NOSVE-LABEL: fmul_v8f16:
1754; NONEON-NOSVE:       // %bb.0:
1755; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
1756; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
1757; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
1758; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1759; NONEON-NOSVE-NEXT:    fcvt s0, h0
1760; NONEON-NOSVE-NEXT:    fcvt s1, h1
1761; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1762; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1763; NONEON-NOSVE-NEXT:    fcvt s1, h1
1764; NONEON-NOSVE-NEXT:    fcvt h0, s0
1765; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
1766; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
1767; NONEON-NOSVE-NEXT:    fcvt s0, h0
1768; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1769; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1770; NONEON-NOSVE-NEXT:    fcvt s1, h1
1771; NONEON-NOSVE-NEXT:    fcvt h0, s0
1772; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
1773; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
1774; NONEON-NOSVE-NEXT:    fcvt s0, h0
1775; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1776; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1777; NONEON-NOSVE-NEXT:    fcvt s1, h1
1778; NONEON-NOSVE-NEXT:    fcvt h0, s0
1779; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
1780; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
1781; NONEON-NOSVE-NEXT:    fcvt s0, h0
1782; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1783; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
1784; NONEON-NOSVE-NEXT:    fcvt s1, h1
1785; NONEON-NOSVE-NEXT:    fcvt h0, s0
1786; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
1787; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
1788; NONEON-NOSVE-NEXT:    fcvt s0, h0
1789; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1790; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
1791; NONEON-NOSVE-NEXT:    fcvt s1, h1
1792; NONEON-NOSVE-NEXT:    fcvt h0, s0
1793; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
1794; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
1795; NONEON-NOSVE-NEXT:    fcvt s0, h0
1796; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1797; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1798; NONEON-NOSVE-NEXT:    fcvt s1, h1
1799; NONEON-NOSVE-NEXT:    fcvt h0, s0
1800; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
1801; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1802; NONEON-NOSVE-NEXT:    fcvt s0, h0
1803; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1804; NONEON-NOSVE-NEXT:    ldr h1, [sp]
1805; NONEON-NOSVE-NEXT:    fcvt s1, h1
1806; NONEON-NOSVE-NEXT:    fcvt h0, s0
1807; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
1808; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
1809; NONEON-NOSVE-NEXT:    fcvt s0, h0
1810; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1811; NONEON-NOSVE-NEXT:    fcvt h0, s0
1812; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
1813; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
1814; NONEON-NOSVE-NEXT:    add sp, sp, #48
1815; NONEON-NOSVE-NEXT:    ret
1816  %res = fmul <8 x half> %op1, %op2
1817  ret <8 x half> %res
1818}
1819
1820define void @fmul_v16f16(ptr %a, ptr %b) {
1821; CHECK-LABEL: fmul_v16f16:
1822; CHECK:       // %bb.0:
1823; CHECK-NEXT:    ldp q0, q3, [x1]
1824; CHECK-NEXT:    ptrue p0.h, vl8
1825; CHECK-NEXT:    ldp q1, q2, [x0]
1826; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
1827; CHECK-NEXT:    movprfx z1, z2
1828; CHECK-NEXT:    fmul z1.h, p0/m, z1.h, z3.h
1829; CHECK-NEXT:    stp q0, q1, [x0]
1830; CHECK-NEXT:    ret
1831;
1832; NONEON-NOSVE-LABEL: fmul_v16f16:
1833; NONEON-NOSVE:       // %bb.0:
1834; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1835; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
1836; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
1837; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
1838; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
1839; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
1840; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
1841; NONEON-NOSVE-NEXT:    ldr h1, [sp, #46]
1842; NONEON-NOSVE-NEXT:    fcvt s0, h0
1843; NONEON-NOSVE-NEXT:    fcvt s1, h1
1844; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1845; NONEON-NOSVE-NEXT:    ldr h1, [sp, #44]
1846; NONEON-NOSVE-NEXT:    fcvt s1, h1
1847; NONEON-NOSVE-NEXT:    fcvt h0, s0
1848; NONEON-NOSVE-NEXT:    str h0, [sp, #94]
1849; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
1850; NONEON-NOSVE-NEXT:    fcvt s0, h0
1851; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1852; NONEON-NOSVE-NEXT:    ldr h1, [sp, #42]
1853; NONEON-NOSVE-NEXT:    fcvt s1, h1
1854; NONEON-NOSVE-NEXT:    fcvt h0, s0
1855; NONEON-NOSVE-NEXT:    str h0, [sp, #92]
1856; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
1857; NONEON-NOSVE-NEXT:    fcvt s0, h0
1858; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1859; NONEON-NOSVE-NEXT:    ldr h1, [sp, #40]
1860; NONEON-NOSVE-NEXT:    fcvt s1, h1
1861; NONEON-NOSVE-NEXT:    fcvt h0, s0
1862; NONEON-NOSVE-NEXT:    str h0, [sp, #90]
1863; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
1864; NONEON-NOSVE-NEXT:    fcvt s0, h0
1865; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1866; NONEON-NOSVE-NEXT:    ldr h1, [sp, #38]
1867; NONEON-NOSVE-NEXT:    fcvt s1, h1
1868; NONEON-NOSVE-NEXT:    fcvt h0, s0
1869; NONEON-NOSVE-NEXT:    str h0, [sp, #88]
1870; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
1871; NONEON-NOSVE-NEXT:    fcvt s0, h0
1872; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1873; NONEON-NOSVE-NEXT:    ldr h1, [sp, #36]
1874; NONEON-NOSVE-NEXT:    fcvt s1, h1
1875; NONEON-NOSVE-NEXT:    fcvt h0, s0
1876; NONEON-NOSVE-NEXT:    str h0, [sp, #86]
1877; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
1878; NONEON-NOSVE-NEXT:    fcvt s0, h0
1879; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1880; NONEON-NOSVE-NEXT:    ldr h1, [sp, #34]
1881; NONEON-NOSVE-NEXT:    fcvt s1, h1
1882; NONEON-NOSVE-NEXT:    fcvt h0, s0
1883; NONEON-NOSVE-NEXT:    str h0, [sp, #84]
1884; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
1885; NONEON-NOSVE-NEXT:    fcvt s0, h0
1886; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1887; NONEON-NOSVE-NEXT:    ldr h1, [sp, #32]
1888; NONEON-NOSVE-NEXT:    fcvt s1, h1
1889; NONEON-NOSVE-NEXT:    fcvt h0, s0
1890; NONEON-NOSVE-NEXT:    str h0, [sp, #82]
1891; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
1892; NONEON-NOSVE-NEXT:    fcvt s0, h0
1893; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1894; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
1895; NONEON-NOSVE-NEXT:    fcvt s1, h1
1896; NONEON-NOSVE-NEXT:    fcvt h0, s0
1897; NONEON-NOSVE-NEXT:    str h0, [sp, #80]
1898; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
1899; NONEON-NOSVE-NEXT:    fcvt s0, h0
1900; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1901; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
1902; NONEON-NOSVE-NEXT:    fcvt s1, h1
1903; NONEON-NOSVE-NEXT:    fcvt h0, s0
1904; NONEON-NOSVE-NEXT:    str h0, [sp, #78]
1905; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
1906; NONEON-NOSVE-NEXT:    fcvt s0, h0
1907; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1908; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
1909; NONEON-NOSVE-NEXT:    fcvt s1, h1
1910; NONEON-NOSVE-NEXT:    fcvt h0, s0
1911; NONEON-NOSVE-NEXT:    str h0, [sp, #76]
1912; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
1913; NONEON-NOSVE-NEXT:    fcvt s0, h0
1914; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1915; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
1916; NONEON-NOSVE-NEXT:    fcvt s1, h1
1917; NONEON-NOSVE-NEXT:    fcvt h0, s0
1918; NONEON-NOSVE-NEXT:    str h0, [sp, #74]
1919; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
1920; NONEON-NOSVE-NEXT:    fcvt s0, h0
1921; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1922; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
1923; NONEON-NOSVE-NEXT:    fcvt s1, h1
1924; NONEON-NOSVE-NEXT:    fcvt h0, s0
1925; NONEON-NOSVE-NEXT:    str h0, [sp, #72]
1926; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
1927; NONEON-NOSVE-NEXT:    fcvt s0, h0
1928; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1929; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
1930; NONEON-NOSVE-NEXT:    fcvt s1, h1
1931; NONEON-NOSVE-NEXT:    fcvt h0, s0
1932; NONEON-NOSVE-NEXT:    str h0, [sp, #70]
1933; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
1934; NONEON-NOSVE-NEXT:    fcvt s0, h0
1935; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1936; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
1937; NONEON-NOSVE-NEXT:    fcvt s1, h1
1938; NONEON-NOSVE-NEXT:    fcvt h0, s0
1939; NONEON-NOSVE-NEXT:    str h0, [sp, #68]
1940; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
1941; NONEON-NOSVE-NEXT:    fcvt s0, h0
1942; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1943; NONEON-NOSVE-NEXT:    ldr h1, [sp]
1944; NONEON-NOSVE-NEXT:    fcvt s1, h1
1945; NONEON-NOSVE-NEXT:    fcvt h0, s0
1946; NONEON-NOSVE-NEXT:    str h0, [sp, #66]
1947; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
1948; NONEON-NOSVE-NEXT:    fcvt s0, h0
1949; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1950; NONEON-NOSVE-NEXT:    fcvt h0, s0
1951; NONEON-NOSVE-NEXT:    str h0, [sp, #64]
1952; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
1953; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
1954; NONEON-NOSVE-NEXT:    add sp, sp, #96
1955; NONEON-NOSVE-NEXT:    ret
1956  %op1 = load <16 x half>, ptr %a
1957  %op2 = load <16 x half>, ptr %b
1958  %res = fmul <16 x half> %op1, %op2
1959  store <16 x half> %res, ptr %a
1960  ret void
1961}
1962
1963define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) {
1964; CHECK-LABEL: fmul_v2f32:
1965; CHECK:       // %bb.0:
1966; CHECK-NEXT:    ptrue p0.s, vl2
1967; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
1968; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
1969; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
1970; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1971; CHECK-NEXT:    ret
1972;
1973; NONEON-NOSVE-LABEL: fmul_v2f32:
1974; NONEON-NOSVE:       // %bb.0:
1975; NONEON-NOSVE-NEXT:    sub sp, sp, #32
1976; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
1977; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
1978; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
1979; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
1980; NONEON-NOSVE-NEXT:    fmul s3, s2, s0
1981; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
1982; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
1983; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #24]
1984; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
1985; NONEON-NOSVE-NEXT:    add sp, sp, #32
1986; NONEON-NOSVE-NEXT:    ret
1987  %res = fmul <2 x float> %op1, %op2
1988  ret <2 x float> %res
1989}
1990
1991define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) {
1992; CHECK-LABEL: fmul_v4f32:
1993; CHECK:       // %bb.0:
1994; CHECK-NEXT:    ptrue p0.s, vl4
1995; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
1996; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
1997; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
1998; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1999; CHECK-NEXT:    ret
2000;
2001; NONEON-NOSVE-LABEL: fmul_v4f32:
2002; NONEON-NOSVE:       // %bb.0:
2003; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
2004; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
2005; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
2006; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
2007; NONEON-NOSVE-NEXT:    fmul s3, s2, s0
2008; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
2009; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
2010; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
2011; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #40]
2012; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
2013; NONEON-NOSVE-NEXT:    fmul s3, s2, s0
2014; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
2015; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
2016; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #32]
2017; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
2018; NONEON-NOSVE-NEXT:    add sp, sp, #48
2019; NONEON-NOSVE-NEXT:    ret
2020  %res = fmul <4 x float> %op1, %op2
2021  ret <4 x float> %res
2022}
2023
2024define void @fmul_v8f32(ptr %a, ptr %b) {
2025; CHECK-LABEL: fmul_v8f32:
2026; CHECK:       // %bb.0:
2027; CHECK-NEXT:    ldp q0, q3, [x1]
2028; CHECK-NEXT:    ptrue p0.s, vl4
2029; CHECK-NEXT:    ldp q1, q2, [x0]
2030; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
2031; CHECK-NEXT:    movprfx z1, z2
2032; CHECK-NEXT:    fmul z1.s, p0/m, z1.s, z3.s
2033; CHECK-NEXT:    stp q0, q1, [x0]
2034; CHECK-NEXT:    ret
2035;
2036; NONEON-NOSVE-LABEL: fmul_v8f32:
2037; NONEON-NOSVE:       // %bb.0:
2038; NONEON-NOSVE-NEXT:    sub sp, sp, #96
2039; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
2040; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
2041; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
2042; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
2043; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
2044; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #40]
2045; NONEON-NOSVE-NEXT:    ldr s0, [sp, #60]
2046; NONEON-NOSVE-NEXT:    fmul s3, s2, s0
2047; NONEON-NOSVE-NEXT:    ldr s0, [sp, #56]
2048; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
2049; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #32]
2050; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #88]
2051; NONEON-NOSVE-NEXT:    ldr s0, [sp, #52]
2052; NONEON-NOSVE-NEXT:    fmul s3, s2, s0
2053; NONEON-NOSVE-NEXT:    ldr s0, [sp, #48]
2054; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
2055; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
2056; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #80]
2057; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
2058; NONEON-NOSVE-NEXT:    fmul s3, s2, s0
2059; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
2060; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
2061; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
2062; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #72]
2063; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
2064; NONEON-NOSVE-NEXT:    fmul s3, s2, s0
2065; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
2066; NONEON-NOSVE-NEXT:    fmul s0, s1, s0
2067; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #64]
2068; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
2069; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2070; NONEON-NOSVE-NEXT:    add sp, sp, #96
2071; NONEON-NOSVE-NEXT:    ret
2072  %op1 = load <8 x float>, ptr %a
2073  %op2 = load <8 x float>, ptr %b
2074  %res = fmul <8 x float> %op1, %op2
2075  store <8 x float> %res, ptr %a
2076  ret void
2077}
2078
2079define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) {
2080; CHECK-LABEL: fmul_v2f64:
2081; CHECK:       // %bb.0:
2082; CHECK-NEXT:    ptrue p0.d, vl2
2083; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2084; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
2085; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, z1.d
2086; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2087; CHECK-NEXT:    ret
2088;
2089; NONEON-NOSVE-LABEL: fmul_v2f64:
2090; NONEON-NOSVE:       // %bb.0:
2091; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
2092; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
2093; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
2094; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
2095; NONEON-NOSVE-NEXT:    fmul d3, d2, d0
2096; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
2097; NONEON-NOSVE-NEXT:    fmul d0, d1, d0
2098; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #32]
2099; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
2100; NONEON-NOSVE-NEXT:    add sp, sp, #48
2101; NONEON-NOSVE-NEXT:    ret
2102  %res = fmul <2 x double> %op1, %op2
2103  ret <2 x double> %res
2104}
2105
2106define void @fmul_v4f64(ptr %a, ptr %b) {
2107; CHECK-LABEL: fmul_v4f64:
2108; CHECK:       // %bb.0:
2109; CHECK-NEXT:    ldp q0, q3, [x1]
2110; CHECK-NEXT:    ptrue p0.d, vl2
2111; CHECK-NEXT:    ldp q1, q2, [x0]
2112; CHECK-NEXT:    fmul z0.d, p0/m, z0.d, z1.d
2113; CHECK-NEXT:    movprfx z1, z2
2114; CHECK-NEXT:    fmul z1.d, p0/m, z1.d, z3.d
2115; CHECK-NEXT:    stp q0, q1, [x0]
2116; CHECK-NEXT:    ret
2117;
2118; NONEON-NOSVE-LABEL: fmul_v4f64:
2119; NONEON-NOSVE:       // %bb.0:
2120; NONEON-NOSVE-NEXT:    sub sp, sp, #96
2121; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
2122; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
2123; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
2124; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
2125; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
2126; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp, #32]
2127; NONEON-NOSVE-NEXT:    ldr d0, [sp, #56]
2128; NONEON-NOSVE-NEXT:    fmul d3, d2, d0
2129; NONEON-NOSVE-NEXT:    ldr d0, [sp, #48]
2130; NONEON-NOSVE-NEXT:    fmul d0, d1, d0
2131; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
2132; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #80]
2133; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
2134; NONEON-NOSVE-NEXT:    fmul d3, d2, d0
2135; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
2136; NONEON-NOSVE-NEXT:    fmul d0, d1, d0
2137; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #64]
2138; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
2139; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2140; NONEON-NOSVE-NEXT:    add sp, sp, #96
2141; NONEON-NOSVE-NEXT:    ret
2142  %op1 = load <4 x double>, ptr %a
2143  %op2 = load <4 x double>, ptr %b
2144  %res = fmul <4 x double> %op1, %op2
2145  store <4 x double> %res, ptr %a
2146  ret void
2147}
2148
2149;
2150; FNEG
2151;
2152
2153define <2 x half> @fneg_v2f16(<2 x half> %op) {
2154; CHECK-LABEL: fneg_v2f16:
2155; CHECK:       // %bb.0:
2156; CHECK-NEXT:    ptrue p0.h, vl4
2157; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
2158; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
2159; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2160; CHECK-NEXT:    ret
2161;
2162; NONEON-NOSVE-LABEL: fneg_v2f16:
2163; NONEON-NOSVE:       // %bb.0:
2164; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
2165; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
2166; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
2167; NONEON-NOSVE-NEXT:    fmov w8, s0
2168; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2169; NONEON-NOSVE-NEXT:    fmov s0, w8
2170; NONEON-NOSVE-NEXT:    str h0, [sp, #14]
2171; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
2172; NONEON-NOSVE-NEXT:    fmov w8, s0
2173; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2174; NONEON-NOSVE-NEXT:    fmov s0, w8
2175; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
2176; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
2177; NONEON-NOSVE-NEXT:    fmov w8, s0
2178; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2179; NONEON-NOSVE-NEXT:    fmov s0, w8
2180; NONEON-NOSVE-NEXT:    str h0, [sp, #10]
2181; NONEON-NOSVE-NEXT:    ldr h0, [sp]
2182; NONEON-NOSVE-NEXT:    fmov w8, s0
2183; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2184; NONEON-NOSVE-NEXT:    fmov s0, w8
2185; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
2186; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2187; NONEON-NOSVE-NEXT:    add sp, sp, #16
2188; NONEON-NOSVE-NEXT:    ret
2189  %res = fneg <2 x half> %op
2190  ret <2 x half> %res
2191}
2192
2193define <4 x half> @fneg_v4f16(<4 x half> %op) {
2194; CHECK-LABEL: fneg_v4f16:
2195; CHECK:       // %bb.0:
2196; CHECK-NEXT:    ptrue p0.h, vl4
2197; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
2198; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
2199; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2200; CHECK-NEXT:    ret
2201;
2202; NONEON-NOSVE-LABEL: fneg_v4f16:
2203; NONEON-NOSVE:       // %bb.0:
2204; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
2205; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
2206; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
2207; NONEON-NOSVE-NEXT:    fmov w8, s0
2208; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2209; NONEON-NOSVE-NEXT:    fmov s0, w8
2210; NONEON-NOSVE-NEXT:    str h0, [sp, #14]
2211; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
2212; NONEON-NOSVE-NEXT:    fmov w8, s0
2213; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2214; NONEON-NOSVE-NEXT:    fmov s0, w8
2215; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
2216; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
2217; NONEON-NOSVE-NEXT:    fmov w8, s0
2218; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2219; NONEON-NOSVE-NEXT:    fmov s0, w8
2220; NONEON-NOSVE-NEXT:    str h0, [sp, #10]
2221; NONEON-NOSVE-NEXT:    ldr h0, [sp]
2222; NONEON-NOSVE-NEXT:    fmov w8, s0
2223; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2224; NONEON-NOSVE-NEXT:    fmov s0, w8
2225; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
2226; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2227; NONEON-NOSVE-NEXT:    add sp, sp, #16
2228; NONEON-NOSVE-NEXT:    ret
2229  %res = fneg <4 x half> %op
2230  ret <4 x half> %res
2231}
2232
2233define <8 x half> @fneg_v8f16(<8 x half> %op) {
2234; CHECK-LABEL: fneg_v8f16:
2235; CHECK:       // %bb.0:
2236; CHECK-NEXT:    ptrue p0.h, vl8
2237; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2238; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
2239; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2240; CHECK-NEXT:    ret
2241;
2242; NONEON-NOSVE-LABEL: fneg_v8f16:
2243; NONEON-NOSVE:       // %bb.0:
2244; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
2245; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2246; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
2247; NONEON-NOSVE-NEXT:    fmov w8, s0
2248; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2249; NONEON-NOSVE-NEXT:    fmov s0, w8
2250; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
2251; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
2252; NONEON-NOSVE-NEXT:    fmov w8, s0
2253; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2254; NONEON-NOSVE-NEXT:    fmov s0, w8
2255; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
2256; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
2257; NONEON-NOSVE-NEXT:    fmov w8, s0
2258; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2259; NONEON-NOSVE-NEXT:    fmov s0, w8
2260; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
2261; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
2262; NONEON-NOSVE-NEXT:    fmov w8, s0
2263; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2264; NONEON-NOSVE-NEXT:    fmov s0, w8
2265; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
2266; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
2267; NONEON-NOSVE-NEXT:    fmov w8, s0
2268; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2269; NONEON-NOSVE-NEXT:    fmov s0, w8
2270; NONEON-NOSVE-NEXT:    str h0, [sp, #22]
2271; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
2272; NONEON-NOSVE-NEXT:    fmov w8, s0
2273; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2274; NONEON-NOSVE-NEXT:    fmov s0, w8
2275; NONEON-NOSVE-NEXT:    str h0, [sp, #20]
2276; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
2277; NONEON-NOSVE-NEXT:    fmov w8, s0
2278; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2279; NONEON-NOSVE-NEXT:    fmov s0, w8
2280; NONEON-NOSVE-NEXT:    str h0, [sp, #18]
2281; NONEON-NOSVE-NEXT:    ldr h0, [sp]
2282; NONEON-NOSVE-NEXT:    fmov w8, s0
2283; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2284; NONEON-NOSVE-NEXT:    fmov s0, w8
2285; NONEON-NOSVE-NEXT:    str h0, [sp, #16]
2286; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
2287; NONEON-NOSVE-NEXT:    add sp, sp, #32
2288; NONEON-NOSVE-NEXT:    ret
2289  %res = fneg <8 x half> %op
2290  ret <8 x half> %res
2291}
2292
2293define void @fneg_v16f16(ptr %a, ptr %b) {
2294; CHECK-LABEL: fneg_v16f16:
2295; CHECK:       // %bb.0:
2296; CHECK-NEXT:    ldp q0, q1, [x0]
2297; CHECK-NEXT:    ptrue p0.h, vl8
2298; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
2299; CHECK-NEXT:    fneg z1.h, p0/m, z1.h
2300; CHECK-NEXT:    stp q0, q1, [x0]
2301; CHECK-NEXT:    ret
2302;
2303; NONEON-NOSVE-LABEL: fneg_v16f16:
2304; NONEON-NOSVE:       // %bb.0:
2305; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2306; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
2307; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
2308; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
2309; NONEON-NOSVE-NEXT:    fmov w8, s0
2310; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2311; NONEON-NOSVE-NEXT:    fmov s0, w8
2312; NONEON-NOSVE-NEXT:    str h0, [sp, #62]
2313; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
2314; NONEON-NOSVE-NEXT:    fmov w8, s0
2315; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2316; NONEON-NOSVE-NEXT:    fmov s0, w8
2317; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
2318; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
2319; NONEON-NOSVE-NEXT:    fmov w8, s0
2320; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2321; NONEON-NOSVE-NEXT:    fmov s0, w8
2322; NONEON-NOSVE-NEXT:    str h0, [sp, #58]
2323; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
2324; NONEON-NOSVE-NEXT:    fmov w8, s0
2325; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2326; NONEON-NOSVE-NEXT:    fmov s0, w8
2327; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
2328; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
2329; NONEON-NOSVE-NEXT:    fmov w8, s0
2330; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2331; NONEON-NOSVE-NEXT:    fmov s0, w8
2332; NONEON-NOSVE-NEXT:    str h0, [sp, #54]
2333; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
2334; NONEON-NOSVE-NEXT:    fmov w8, s0
2335; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2336; NONEON-NOSVE-NEXT:    fmov s0, w8
2337; NONEON-NOSVE-NEXT:    str h0, [sp, #52]
2338; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
2339; NONEON-NOSVE-NEXT:    fmov w8, s0
2340; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2341; NONEON-NOSVE-NEXT:    fmov s0, w8
2342; NONEON-NOSVE-NEXT:    str h0, [sp, #50]
2343; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
2344; NONEON-NOSVE-NEXT:    fmov w8, s0
2345; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2346; NONEON-NOSVE-NEXT:    fmov s0, w8
2347; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
2348; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
2349; NONEON-NOSVE-NEXT:    fmov w8, s0
2350; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2351; NONEON-NOSVE-NEXT:    fmov s0, w8
2352; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
2353; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
2354; NONEON-NOSVE-NEXT:    fmov w8, s0
2355; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2356; NONEON-NOSVE-NEXT:    fmov s0, w8
2357; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
2358; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
2359; NONEON-NOSVE-NEXT:    fmov w8, s0
2360; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2361; NONEON-NOSVE-NEXT:    fmov s0, w8
2362; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
2363; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
2364; NONEON-NOSVE-NEXT:    fmov w8, s0
2365; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2366; NONEON-NOSVE-NEXT:    fmov s0, w8
2367; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
2368; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
2369; NONEON-NOSVE-NEXT:    fmov w8, s0
2370; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2371; NONEON-NOSVE-NEXT:    fmov s0, w8
2372; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
2373; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
2374; NONEON-NOSVE-NEXT:    fmov w8, s0
2375; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2376; NONEON-NOSVE-NEXT:    fmov s0, w8
2377; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
2378; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
2379; NONEON-NOSVE-NEXT:    fmov w8, s0
2380; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2381; NONEON-NOSVE-NEXT:    fmov s0, w8
2382; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
2383; NONEON-NOSVE-NEXT:    ldr h0, [sp]
2384; NONEON-NOSVE-NEXT:    fmov w8, s0
2385; NONEON-NOSVE-NEXT:    eor w8, w8, #0x8000
2386; NONEON-NOSVE-NEXT:    fmov s0, w8
2387; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
2388; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
2389; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2390; NONEON-NOSVE-NEXT:    add sp, sp, #64
2391; NONEON-NOSVE-NEXT:    ret
2392  %op = load <16 x half>, ptr %a
2393  %res = fneg <16 x half> %op
2394  store <16 x half> %res, ptr %a
2395  ret void
2396}
2397
2398define <2 x float> @fneg_v2f32(<2 x float> %op) {
2399; CHECK-LABEL: fneg_v2f32:
2400; CHECK:       // %bb.0:
2401; CHECK-NEXT:    ptrue p0.s, vl2
2402; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
2403; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
2404; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2405; CHECK-NEXT:    ret
2406;
2407; NONEON-NOSVE-LABEL: fneg_v2f32:
2408; NONEON-NOSVE:       // %bb.0:
2409; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
2410; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
2411; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
2412; NONEON-NOSVE-NEXT:    fneg s1, s0
2413; NONEON-NOSVE-NEXT:    ldr s0, [sp]
2414; NONEON-NOSVE-NEXT:    fneg s0, s0
2415; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #8]
2416; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2417; NONEON-NOSVE-NEXT:    add sp, sp, #16
2418; NONEON-NOSVE-NEXT:    ret
2419  %res = fneg <2 x float> %op
2420  ret <2 x float> %res
2421}
2422
2423define <4 x float> @fneg_v4f32(<4 x float> %op) {
2424; CHECK-LABEL: fneg_v4f32:
2425; CHECK:       // %bb.0:
2426; CHECK-NEXT:    ptrue p0.s, vl4
2427; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2428; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
2429; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2430; CHECK-NEXT:    ret
2431;
2432; NONEON-NOSVE-LABEL: fneg_v4f32:
2433; NONEON-NOSVE:       // %bb.0:
2434; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
2435; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2436; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
2437; NONEON-NOSVE-NEXT:    fneg s1, s0
2438; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
2439; NONEON-NOSVE-NEXT:    fneg s0, s0
2440; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #24]
2441; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
2442; NONEON-NOSVE-NEXT:    fneg s1, s0
2443; NONEON-NOSVE-NEXT:    ldr s0, [sp]
2444; NONEON-NOSVE-NEXT:    fneg s0, s0
2445; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #16]
2446; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
2447; NONEON-NOSVE-NEXT:    add sp, sp, #32
2448; NONEON-NOSVE-NEXT:    ret
2449  %res = fneg <4 x float> %op
2450  ret <4 x float> %res
2451}
2452
2453define void @fneg_v8f32(ptr %a) {
2454; CHECK-LABEL: fneg_v8f32:
2455; CHECK:       // %bb.0:
2456; CHECK-NEXT:    ldp q0, q1, [x0]
2457; CHECK-NEXT:    ptrue p0.s, vl4
2458; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
2459; CHECK-NEXT:    fneg z1.s, p0/m, z1.s
2460; CHECK-NEXT:    stp q0, q1, [x0]
2461; CHECK-NEXT:    ret
2462;
2463; NONEON-NOSVE-LABEL: fneg_v8f32:
2464; NONEON-NOSVE:       // %bb.0:
2465; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2466; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
2467; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
2468; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
2469; NONEON-NOSVE-NEXT:    fneg s1, s0
2470; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
2471; NONEON-NOSVE-NEXT:    fneg s0, s0
2472; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #56]
2473; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
2474; NONEON-NOSVE-NEXT:    fneg s1, s0
2475; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
2476; NONEON-NOSVE-NEXT:    fneg s0, s0
2477; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #48]
2478; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
2479; NONEON-NOSVE-NEXT:    fneg s1, s0
2480; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
2481; NONEON-NOSVE-NEXT:    fneg s0, s0
2482; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #40]
2483; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
2484; NONEON-NOSVE-NEXT:    fneg s1, s0
2485; NONEON-NOSVE-NEXT:    ldr s0, [sp]
2486; NONEON-NOSVE-NEXT:    fneg s0, s0
2487; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #32]
2488; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
2489; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2490; NONEON-NOSVE-NEXT:    add sp, sp, #64
2491; NONEON-NOSVE-NEXT:    ret
2492  %op = load <8 x float>, ptr %a
2493  %res = fneg <8 x float> %op
2494  store <8 x float> %res, ptr %a
2495  ret void
2496}
2497
2498define <2 x double> @fneg_v2f64(<2 x double> %op) {
2499; CHECK-LABEL: fneg_v2f64:
2500; CHECK:       // %bb.0:
2501; CHECK-NEXT:    ptrue p0.d, vl2
2502; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2503; CHECK-NEXT:    fneg z0.d, p0/m, z0.d
2504; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2505; CHECK-NEXT:    ret
2506;
2507; NONEON-NOSVE-LABEL: fneg_v2f64:
2508; NONEON-NOSVE:       // %bb.0:
2509; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
2510; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2511; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2512; NONEON-NOSVE-NEXT:    fneg d1, d0
2513; NONEON-NOSVE-NEXT:    ldr d0, [sp]
2514; NONEON-NOSVE-NEXT:    fneg d0, d0
2515; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
2516; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
2517; NONEON-NOSVE-NEXT:    add sp, sp, #32
2518; NONEON-NOSVE-NEXT:    ret
2519  %res = fneg <2 x double> %op
2520  ret <2 x double> %res
2521}
2522
2523define void @fneg_v4f64(ptr %a) {
2524; CHECK-LABEL: fneg_v4f64:
2525; CHECK:       // %bb.0:
2526; CHECK-NEXT:    ldp q0, q1, [x0]
2527; CHECK-NEXT:    ptrue p0.d, vl2
2528; CHECK-NEXT:    fneg z0.d, p0/m, z0.d
2529; CHECK-NEXT:    fneg z1.d, p0/m, z1.d
2530; CHECK-NEXT:    stp q0, q1, [x0]
2531; CHECK-NEXT:    ret
2532;
2533; NONEON-NOSVE-LABEL: fneg_v4f64:
2534; NONEON-NOSVE:       // %bb.0:
2535; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2536; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
2537; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
2538; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
2539; NONEON-NOSVE-NEXT:    fneg d1, d0
2540; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
2541; NONEON-NOSVE-NEXT:    fneg d0, d0
2542; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
2543; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2544; NONEON-NOSVE-NEXT:    fneg d1, d0
2545; NONEON-NOSVE-NEXT:    ldr d0, [sp]
2546; NONEON-NOSVE-NEXT:    fneg d0, d0
2547; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
2548; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
2549; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2550; NONEON-NOSVE-NEXT:    add sp, sp, #64
2551; NONEON-NOSVE-NEXT:    ret
2552  %op = load <4 x double>, ptr %a
2553  %res = fneg <4 x double> %op
2554  store <4 x double> %res, ptr %a
2555  ret void
2556}
2557
2558;
2559; FSQRT
2560;
2561
2562define <2 x half> @fsqrt_v2f16(<2 x half> %op) {
2563; CHECK-LABEL: fsqrt_v2f16:
2564; CHECK:       // %bb.0:
2565; CHECK-NEXT:    ptrue p0.h, vl4
2566; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
2567; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
2568; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2569; CHECK-NEXT:    ret
2570;
2571; NONEON-NOSVE-LABEL: fsqrt_v2f16:
2572; NONEON-NOSVE:       // %bb.0:
2573; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
2574; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
2575; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
2576; NONEON-NOSVE-NEXT:    fcvt s0, h0
2577; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2578; NONEON-NOSVE-NEXT:    fcvt h0, s0
2579; NONEON-NOSVE-NEXT:    str h0, [sp, #14]
2580; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
2581; NONEON-NOSVE-NEXT:    fcvt s0, h0
2582; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2583; NONEON-NOSVE-NEXT:    fcvt h0, s0
2584; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
2585; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
2586; NONEON-NOSVE-NEXT:    fcvt s0, h0
2587; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2588; NONEON-NOSVE-NEXT:    fcvt h0, s0
2589; NONEON-NOSVE-NEXT:    str h0, [sp, #10]
2590; NONEON-NOSVE-NEXT:    ldr h0, [sp]
2591; NONEON-NOSVE-NEXT:    fcvt s0, h0
2592; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2593; NONEON-NOSVE-NEXT:    fcvt h0, s0
2594; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
2595; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2596; NONEON-NOSVE-NEXT:    add sp, sp, #16
2597; NONEON-NOSVE-NEXT:    ret
2598  %res = call <2 x half> @llvm.sqrt.v2f16(<2 x half> %op)
2599  ret <2 x half> %res
2600}
2601
2602define <4 x half> @fsqrt_v4f16(<4 x half> %op) {
2603; CHECK-LABEL: fsqrt_v4f16:
2604; CHECK:       // %bb.0:
2605; CHECK-NEXT:    ptrue p0.h, vl4
2606; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
2607; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
2608; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2609; CHECK-NEXT:    ret
2610;
2611; NONEON-NOSVE-LABEL: fsqrt_v4f16:
2612; NONEON-NOSVE:       // %bb.0:
2613; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
2614; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
2615; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
2616; NONEON-NOSVE-NEXT:    fcvt s0, h0
2617; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2618; NONEON-NOSVE-NEXT:    fcvt h0, s0
2619; NONEON-NOSVE-NEXT:    str h0, [sp, #14]
2620; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
2621; NONEON-NOSVE-NEXT:    fcvt s0, h0
2622; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2623; NONEON-NOSVE-NEXT:    fcvt h0, s0
2624; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
2625; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
2626; NONEON-NOSVE-NEXT:    fcvt s0, h0
2627; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2628; NONEON-NOSVE-NEXT:    fcvt h0, s0
2629; NONEON-NOSVE-NEXT:    str h0, [sp, #10]
2630; NONEON-NOSVE-NEXT:    ldr h0, [sp]
2631; NONEON-NOSVE-NEXT:    fcvt s0, h0
2632; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2633; NONEON-NOSVE-NEXT:    fcvt h0, s0
2634; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
2635; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2636; NONEON-NOSVE-NEXT:    add sp, sp, #16
2637; NONEON-NOSVE-NEXT:    ret
2638  %res = call <4 x half> @llvm.sqrt.v4f16(<4 x half> %op)
2639  ret <4 x half> %res
2640}
2641
2642define <8 x half> @fsqrt_v8f16(<8 x half> %op) {
2643; CHECK-LABEL: fsqrt_v8f16:
2644; CHECK:       // %bb.0:
2645; CHECK-NEXT:    ptrue p0.h, vl8
2646; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2647; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
2648; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2649; CHECK-NEXT:    ret
2650;
2651; NONEON-NOSVE-LABEL: fsqrt_v8f16:
2652; NONEON-NOSVE:       // %bb.0:
2653; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
2654; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2655; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
2656; NONEON-NOSVE-NEXT:    fcvt s0, h0
2657; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2658; NONEON-NOSVE-NEXT:    fcvt h0, s0
2659; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
2660; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
2661; NONEON-NOSVE-NEXT:    fcvt s0, h0
2662; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2663; NONEON-NOSVE-NEXT:    fcvt h0, s0
2664; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
2665; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
2666; NONEON-NOSVE-NEXT:    fcvt s0, h0
2667; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2668; NONEON-NOSVE-NEXT:    fcvt h0, s0
2669; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
2670; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
2671; NONEON-NOSVE-NEXT:    fcvt s0, h0
2672; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2673; NONEON-NOSVE-NEXT:    fcvt h0, s0
2674; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
2675; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
2676; NONEON-NOSVE-NEXT:    fcvt s0, h0
2677; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2678; NONEON-NOSVE-NEXT:    fcvt h0, s0
2679; NONEON-NOSVE-NEXT:    str h0, [sp, #22]
2680; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
2681; NONEON-NOSVE-NEXT:    fcvt s0, h0
2682; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2683; NONEON-NOSVE-NEXT:    fcvt h0, s0
2684; NONEON-NOSVE-NEXT:    str h0, [sp, #20]
2685; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
2686; NONEON-NOSVE-NEXT:    fcvt s0, h0
2687; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2688; NONEON-NOSVE-NEXT:    fcvt h0, s0
2689; NONEON-NOSVE-NEXT:    str h0, [sp, #18]
2690; NONEON-NOSVE-NEXT:    ldr h0, [sp]
2691; NONEON-NOSVE-NEXT:    fcvt s0, h0
2692; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2693; NONEON-NOSVE-NEXT:    fcvt h0, s0
2694; NONEON-NOSVE-NEXT:    str h0, [sp, #16]
2695; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
2696; NONEON-NOSVE-NEXT:    add sp, sp, #32
2697; NONEON-NOSVE-NEXT:    ret
2698  %res = call <8 x half> @llvm.sqrt.v8f16(<8 x half> %op)
2699  ret <8 x half> %res
2700}
2701
2702define void @fsqrt_v16f16(ptr %a, ptr %b) {
2703; CHECK-LABEL: fsqrt_v16f16:
2704; CHECK:       // %bb.0:
2705; CHECK-NEXT:    ldp q0, q1, [x0]
2706; CHECK-NEXT:    ptrue p0.h, vl8
2707; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
2708; CHECK-NEXT:    fsqrt z1.h, p0/m, z1.h
2709; CHECK-NEXT:    stp q0, q1, [x0]
2710; CHECK-NEXT:    ret
2711;
2712; NONEON-NOSVE-LABEL: fsqrt_v16f16:
2713; NONEON-NOSVE:       // %bb.0:
2714; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2715; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
2716; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
2717; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
2718; NONEON-NOSVE-NEXT:    fcvt s0, h0
2719; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2720; NONEON-NOSVE-NEXT:    fcvt h0, s0
2721; NONEON-NOSVE-NEXT:    str h0, [sp, #62]
2722; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
2723; NONEON-NOSVE-NEXT:    fcvt s0, h0
2724; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2725; NONEON-NOSVE-NEXT:    fcvt h0, s0
2726; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
2727; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
2728; NONEON-NOSVE-NEXT:    fcvt s0, h0
2729; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2730; NONEON-NOSVE-NEXT:    fcvt h0, s0
2731; NONEON-NOSVE-NEXT:    str h0, [sp, #58]
2732; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
2733; NONEON-NOSVE-NEXT:    fcvt s0, h0
2734; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2735; NONEON-NOSVE-NEXT:    fcvt h0, s0
2736; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
2737; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
2738; NONEON-NOSVE-NEXT:    fcvt s0, h0
2739; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2740; NONEON-NOSVE-NEXT:    fcvt h0, s0
2741; NONEON-NOSVE-NEXT:    str h0, [sp, #54]
2742; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
2743; NONEON-NOSVE-NEXT:    fcvt s0, h0
2744; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2745; NONEON-NOSVE-NEXT:    fcvt h0, s0
2746; NONEON-NOSVE-NEXT:    str h0, [sp, #52]
2747; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
2748; NONEON-NOSVE-NEXT:    fcvt s0, h0
2749; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2750; NONEON-NOSVE-NEXT:    fcvt h0, s0
2751; NONEON-NOSVE-NEXT:    str h0, [sp, #50]
2752; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
2753; NONEON-NOSVE-NEXT:    fcvt s0, h0
2754; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2755; NONEON-NOSVE-NEXT:    fcvt h0, s0
2756; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
2757; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
2758; NONEON-NOSVE-NEXT:    fcvt s0, h0
2759; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2760; NONEON-NOSVE-NEXT:    fcvt h0, s0
2761; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
2762; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
2763; NONEON-NOSVE-NEXT:    fcvt s0, h0
2764; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2765; NONEON-NOSVE-NEXT:    fcvt h0, s0
2766; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
2767; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
2768; NONEON-NOSVE-NEXT:    fcvt s0, h0
2769; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2770; NONEON-NOSVE-NEXT:    fcvt h0, s0
2771; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
2772; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
2773; NONEON-NOSVE-NEXT:    fcvt s0, h0
2774; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2775; NONEON-NOSVE-NEXT:    fcvt h0, s0
2776; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
2777; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
2778; NONEON-NOSVE-NEXT:    fcvt s0, h0
2779; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2780; NONEON-NOSVE-NEXT:    fcvt h0, s0
2781; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
2782; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
2783; NONEON-NOSVE-NEXT:    fcvt s0, h0
2784; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2785; NONEON-NOSVE-NEXT:    fcvt h0, s0
2786; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
2787; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
2788; NONEON-NOSVE-NEXT:    fcvt s0, h0
2789; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2790; NONEON-NOSVE-NEXT:    fcvt h0, s0
2791; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
2792; NONEON-NOSVE-NEXT:    ldr h0, [sp]
2793; NONEON-NOSVE-NEXT:    fcvt s0, h0
2794; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2795; NONEON-NOSVE-NEXT:    fcvt h0, s0
2796; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
2797; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
2798; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2799; NONEON-NOSVE-NEXT:    add sp, sp, #64
2800; NONEON-NOSVE-NEXT:    ret
2801  %op = load <16 x half>, ptr %a
2802  %res = call <16 x half> @llvm.sqrt.v16f16(<16 x half> %op)
2803  store <16 x half> %res, ptr %a
2804  ret void
2805}
2806
2807define <2 x float> @fsqrt_v2f32(<2 x float> %op) {
2808; CHECK-LABEL: fsqrt_v2f32:
2809; CHECK:       // %bb.0:
2810; CHECK-NEXT:    ptrue p0.s, vl2
2811; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
2812; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
2813; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2814; CHECK-NEXT:    ret
2815;
2816; NONEON-NOSVE-LABEL: fsqrt_v2f32:
2817; NONEON-NOSVE:       // %bb.0:
2818; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
2819; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
2820; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
2821; NONEON-NOSVE-NEXT:    fsqrt s1, s0
2822; NONEON-NOSVE-NEXT:    ldr s0, [sp]
2823; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2824; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #8]
2825; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2826; NONEON-NOSVE-NEXT:    add sp, sp, #16
2827; NONEON-NOSVE-NEXT:    ret
2828  %res = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %op)
2829  ret <2 x float> %res
2830}
2831
2832define <4 x float> @fsqrt_v4f32(<4 x float> %op) {
2833; CHECK-LABEL: fsqrt_v4f32:
2834; CHECK:       // %bb.0:
2835; CHECK-NEXT:    ptrue p0.s, vl4
2836; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2837; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
2838; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2839; CHECK-NEXT:    ret
2840;
2841; NONEON-NOSVE-LABEL: fsqrt_v4f32:
2842; NONEON-NOSVE:       // %bb.0:
2843; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
2844; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2845; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
2846; NONEON-NOSVE-NEXT:    fsqrt s1, s0
2847; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
2848; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2849; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #24]
2850; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
2851; NONEON-NOSVE-NEXT:    fsqrt s1, s0
2852; NONEON-NOSVE-NEXT:    ldr s0, [sp]
2853; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2854; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #16]
2855; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
2856; NONEON-NOSVE-NEXT:    add sp, sp, #32
2857; NONEON-NOSVE-NEXT:    ret
2858  %res = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %op)
2859  ret <4 x float> %res
2860}
2861
2862define void @fsqrt_v8f32(ptr %a) {
2863; CHECK-LABEL: fsqrt_v8f32:
2864; CHECK:       // %bb.0:
2865; CHECK-NEXT:    ldp q0, q1, [x0]
2866; CHECK-NEXT:    ptrue p0.s, vl4
2867; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
2868; CHECK-NEXT:    fsqrt z1.s, p0/m, z1.s
2869; CHECK-NEXT:    stp q0, q1, [x0]
2870; CHECK-NEXT:    ret
2871;
2872; NONEON-NOSVE-LABEL: fsqrt_v8f32:
2873; NONEON-NOSVE:       // %bb.0:
2874; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2875; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
2876; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
2877; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
2878; NONEON-NOSVE-NEXT:    fsqrt s1, s0
2879; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
2880; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2881; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #56]
2882; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
2883; NONEON-NOSVE-NEXT:    fsqrt s1, s0
2884; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
2885; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2886; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #48]
2887; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
2888; NONEON-NOSVE-NEXT:    fsqrt s1, s0
2889; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
2890; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2891; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #40]
2892; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
2893; NONEON-NOSVE-NEXT:    fsqrt s1, s0
2894; NONEON-NOSVE-NEXT:    ldr s0, [sp]
2895; NONEON-NOSVE-NEXT:    fsqrt s0, s0
2896; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #32]
2897; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
2898; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2899; NONEON-NOSVE-NEXT:    add sp, sp, #64
2900; NONEON-NOSVE-NEXT:    ret
2901  %op = load <8 x float>, ptr %a
2902  %res = call <8 x float> @llvm.sqrt.v8f32(<8 x float> %op)
2903  store <8 x float> %res, ptr %a
2904  ret void
2905}
2906
2907define <2 x double> @fsqrt_v2f64(<2 x double> %op) {
2908; CHECK-LABEL: fsqrt_v2f64:
2909; CHECK:       // %bb.0:
2910; CHECK-NEXT:    ptrue p0.d, vl2
2911; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
2912; CHECK-NEXT:    fsqrt z0.d, p0/m, z0.d
2913; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2914; CHECK-NEXT:    ret
2915;
2916; NONEON-NOSVE-LABEL: fsqrt_v2f64:
2917; NONEON-NOSVE:       // %bb.0:
2918; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
2919; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2920; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2921; NONEON-NOSVE-NEXT:    fsqrt d1, d0
2922; NONEON-NOSVE-NEXT:    ldr d0, [sp]
2923; NONEON-NOSVE-NEXT:    fsqrt d0, d0
2924; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
2925; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
2926; NONEON-NOSVE-NEXT:    add sp, sp, #32
2927; NONEON-NOSVE-NEXT:    ret
2928  %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %op)
2929  ret <2 x double> %res
2930}
2931
2932define void @fsqrt_v4f64(ptr %a) {
2933; CHECK-LABEL: fsqrt_v4f64:
2934; CHECK:       // %bb.0:
2935; CHECK-NEXT:    ldp q0, q1, [x0]
2936; CHECK-NEXT:    ptrue p0.d, vl2
2937; CHECK-NEXT:    fsqrt z0.d, p0/m, z0.d
2938; CHECK-NEXT:    fsqrt z1.d, p0/m, z1.d
2939; CHECK-NEXT:    stp q0, q1, [x0]
2940; CHECK-NEXT:    ret
2941;
2942; NONEON-NOSVE-LABEL: fsqrt_v4f64:
2943; NONEON-NOSVE:       // %bb.0:
2944; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2945; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
2946; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
2947; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
2948; NONEON-NOSVE-NEXT:    fsqrt d1, d0
2949; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
2950; NONEON-NOSVE-NEXT:    fsqrt d0, d0
2951; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
2952; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
2953; NONEON-NOSVE-NEXT:    fsqrt d1, d0
2954; NONEON-NOSVE-NEXT:    ldr d0, [sp]
2955; NONEON-NOSVE-NEXT:    fsqrt d0, d0
2956; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
2957; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
2958; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
2959; NONEON-NOSVE-NEXT:    add sp, sp, #64
2960; NONEON-NOSVE-NEXT:    ret
2961  %op = load <4 x double>, ptr %a
2962  %res = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %op)
2963  store <4 x double> %res, ptr %a
2964  ret void
2965}
2966
2967;
2968; FSUB
2969;
2970
2971define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) {
2972; CHECK-LABEL: fsub_v2f16:
2973; CHECK:       // %bb.0:
2974; CHECK-NEXT:    ptrue p0.h, vl4
2975; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
2976; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
2977; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
2978; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2979; CHECK-NEXT:    ret
2980;
2981; NONEON-NOSVE-LABEL: fsub_v2f16:
2982; NONEON-NOSVE:       // %bb.0:
2983; NONEON-NOSVE-NEXT:    sub sp, sp, #32
2984; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
2985; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
2986; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
2987; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
2988; NONEON-NOSVE-NEXT:    fcvt s0, h0
2989; NONEON-NOSVE-NEXT:    fcvt s1, h1
2990; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
2991; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
2992; NONEON-NOSVE-NEXT:    fcvt s1, h1
2993; NONEON-NOSVE-NEXT:    fcvt h0, s0
2994; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
2995; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
2996; NONEON-NOSVE-NEXT:    fcvt s0, h0
2997; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
2998; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
2999; NONEON-NOSVE-NEXT:    fcvt s1, h1
3000; NONEON-NOSVE-NEXT:    fcvt h0, s0
3001; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
3002; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
3003; NONEON-NOSVE-NEXT:    fcvt s0, h0
3004; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3005; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
3006; NONEON-NOSVE-NEXT:    fcvt s1, h1
3007; NONEON-NOSVE-NEXT:    fcvt h0, s0
3008; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
3009; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
3010; NONEON-NOSVE-NEXT:    fcvt s0, h0
3011; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3012; NONEON-NOSVE-NEXT:    fcvt h0, s0
3013; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
3014; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
3015; NONEON-NOSVE-NEXT:    add sp, sp, #32
3016; NONEON-NOSVE-NEXT:    ret
3017  %res = fsub <2 x half> %op1, %op2
3018  ret <2 x half> %res
3019}
3020
3021define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) {
3022; CHECK-LABEL: fsub_v4f16:
3023; CHECK:       // %bb.0:
3024; CHECK-NEXT:    ptrue p0.h, vl4
3025; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
3026; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
3027; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
3028; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
3029; CHECK-NEXT:    ret
3030;
3031; NONEON-NOSVE-LABEL: fsub_v4f16:
3032; NONEON-NOSVE:       // %bb.0:
3033; NONEON-NOSVE-NEXT:    sub sp, sp, #32
3034; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
3035; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
3036; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
3037; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
3038; NONEON-NOSVE-NEXT:    fcvt s0, h0
3039; NONEON-NOSVE-NEXT:    fcvt s1, h1
3040; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3041; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
3042; NONEON-NOSVE-NEXT:    fcvt s1, h1
3043; NONEON-NOSVE-NEXT:    fcvt h0, s0
3044; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
3045; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
3046; NONEON-NOSVE-NEXT:    fcvt s0, h0
3047; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3048; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
3049; NONEON-NOSVE-NEXT:    fcvt s1, h1
3050; NONEON-NOSVE-NEXT:    fcvt h0, s0
3051; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
3052; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
3053; NONEON-NOSVE-NEXT:    fcvt s0, h0
3054; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3055; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
3056; NONEON-NOSVE-NEXT:    fcvt s1, h1
3057; NONEON-NOSVE-NEXT:    fcvt h0, s0
3058; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
3059; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
3060; NONEON-NOSVE-NEXT:    fcvt s0, h0
3061; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3062; NONEON-NOSVE-NEXT:    fcvt h0, s0
3063; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
3064; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
3065; NONEON-NOSVE-NEXT:    add sp, sp, #32
3066; NONEON-NOSVE-NEXT:    ret
3067  %res = fsub <4 x half> %op1, %op2
3068  ret <4 x half> %res
3069}
3070
3071define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) {
3072; CHECK-LABEL: fsub_v8f16:
3073; CHECK:       // %bb.0:
3074; CHECK-NEXT:    ptrue p0.h, vl8
3075; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
3076; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
3077; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
3078; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
3079; CHECK-NEXT:    ret
3080;
3081; NONEON-NOSVE-LABEL: fsub_v8f16:
3082; NONEON-NOSVE:       // %bb.0:
3083; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
3084; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
3085; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
3086; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
3087; NONEON-NOSVE-NEXT:    fcvt s0, h0
3088; NONEON-NOSVE-NEXT:    fcvt s1, h1
3089; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3090; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
3091; NONEON-NOSVE-NEXT:    fcvt s1, h1
3092; NONEON-NOSVE-NEXT:    fcvt h0, s0
3093; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
3094; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
3095; NONEON-NOSVE-NEXT:    fcvt s0, h0
3096; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3097; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
3098; NONEON-NOSVE-NEXT:    fcvt s1, h1
3099; NONEON-NOSVE-NEXT:    fcvt h0, s0
3100; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
3101; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
3102; NONEON-NOSVE-NEXT:    fcvt s0, h0
3103; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3104; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
3105; NONEON-NOSVE-NEXT:    fcvt s1, h1
3106; NONEON-NOSVE-NEXT:    fcvt h0, s0
3107; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
3108; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
3109; NONEON-NOSVE-NEXT:    fcvt s0, h0
3110; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3111; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
3112; NONEON-NOSVE-NEXT:    fcvt s1, h1
3113; NONEON-NOSVE-NEXT:    fcvt h0, s0
3114; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
3115; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
3116; NONEON-NOSVE-NEXT:    fcvt s0, h0
3117; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3118; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
3119; NONEON-NOSVE-NEXT:    fcvt s1, h1
3120; NONEON-NOSVE-NEXT:    fcvt h0, s0
3121; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
3122; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
3123; NONEON-NOSVE-NEXT:    fcvt s0, h0
3124; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3125; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
3126; NONEON-NOSVE-NEXT:    fcvt s1, h1
3127; NONEON-NOSVE-NEXT:    fcvt h0, s0
3128; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
3129; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
3130; NONEON-NOSVE-NEXT:    fcvt s0, h0
3131; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3132; NONEON-NOSVE-NEXT:    ldr h1, [sp]
3133; NONEON-NOSVE-NEXT:    fcvt s1, h1
3134; NONEON-NOSVE-NEXT:    fcvt h0, s0
3135; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
3136; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
3137; NONEON-NOSVE-NEXT:    fcvt s0, h0
3138; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3139; NONEON-NOSVE-NEXT:    fcvt h0, s0
3140; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
3141; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
3142; NONEON-NOSVE-NEXT:    add sp, sp, #48
3143; NONEON-NOSVE-NEXT:    ret
3144  %res = fsub <8 x half> %op1, %op2
3145  ret <8 x half> %res
3146}
3147
3148define void @fsub_v16f16(ptr %a, ptr %b) {
3149; CHECK-LABEL: fsub_v16f16:
3150; CHECK:       // %bb.0:
3151; CHECK-NEXT:    ldp q0, q3, [x1]
3152; CHECK-NEXT:    ptrue p0.h, vl8
3153; CHECK-NEXT:    ldp q1, q2, [x0]
3154; CHECK-NEXT:    fsubr z0.h, p0/m, z0.h, z1.h
3155; CHECK-NEXT:    movprfx z1, z2
3156; CHECK-NEXT:    fsub z1.h, p0/m, z1.h, z3.h
3157; CHECK-NEXT:    stp q0, q1, [x0]
3158; CHECK-NEXT:    ret
3159;
3160; NONEON-NOSVE-LABEL: fsub_v16f16:
3161; NONEON-NOSVE:       // %bb.0:
3162; NONEON-NOSVE-NEXT:    sub sp, sp, #96
3163; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
3164; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
3165; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
3166; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
3167; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
3168; NONEON-NOSVE-NEXT:    ldr h0, [sp, #62]
3169; NONEON-NOSVE-NEXT:    ldr h1, [sp, #46]
3170; NONEON-NOSVE-NEXT:    fcvt s0, h0
3171; NONEON-NOSVE-NEXT:    fcvt s1, h1
3172; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3173; NONEON-NOSVE-NEXT:    ldr h1, [sp, #44]
3174; NONEON-NOSVE-NEXT:    fcvt s1, h1
3175; NONEON-NOSVE-NEXT:    fcvt h0, s0
3176; NONEON-NOSVE-NEXT:    str h0, [sp, #94]
3177; NONEON-NOSVE-NEXT:    ldr h0, [sp, #60]
3178; NONEON-NOSVE-NEXT:    fcvt s0, h0
3179; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3180; NONEON-NOSVE-NEXT:    ldr h1, [sp, #42]
3181; NONEON-NOSVE-NEXT:    fcvt s1, h1
3182; NONEON-NOSVE-NEXT:    fcvt h0, s0
3183; NONEON-NOSVE-NEXT:    str h0, [sp, #92]
3184; NONEON-NOSVE-NEXT:    ldr h0, [sp, #58]
3185; NONEON-NOSVE-NEXT:    fcvt s0, h0
3186; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3187; NONEON-NOSVE-NEXT:    ldr h1, [sp, #40]
3188; NONEON-NOSVE-NEXT:    fcvt s1, h1
3189; NONEON-NOSVE-NEXT:    fcvt h0, s0
3190; NONEON-NOSVE-NEXT:    str h0, [sp, #90]
3191; NONEON-NOSVE-NEXT:    ldr h0, [sp, #56]
3192; NONEON-NOSVE-NEXT:    fcvt s0, h0
3193; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3194; NONEON-NOSVE-NEXT:    ldr h1, [sp, #38]
3195; NONEON-NOSVE-NEXT:    fcvt s1, h1
3196; NONEON-NOSVE-NEXT:    fcvt h0, s0
3197; NONEON-NOSVE-NEXT:    str h0, [sp, #88]
3198; NONEON-NOSVE-NEXT:    ldr h0, [sp, #54]
3199; NONEON-NOSVE-NEXT:    fcvt s0, h0
3200; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3201; NONEON-NOSVE-NEXT:    ldr h1, [sp, #36]
3202; NONEON-NOSVE-NEXT:    fcvt s1, h1
3203; NONEON-NOSVE-NEXT:    fcvt h0, s0
3204; NONEON-NOSVE-NEXT:    str h0, [sp, #86]
3205; NONEON-NOSVE-NEXT:    ldr h0, [sp, #52]
3206; NONEON-NOSVE-NEXT:    fcvt s0, h0
3207; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3208; NONEON-NOSVE-NEXT:    ldr h1, [sp, #34]
3209; NONEON-NOSVE-NEXT:    fcvt s1, h1
3210; NONEON-NOSVE-NEXT:    fcvt h0, s0
3211; NONEON-NOSVE-NEXT:    str h0, [sp, #84]
3212; NONEON-NOSVE-NEXT:    ldr h0, [sp, #50]
3213; NONEON-NOSVE-NEXT:    fcvt s0, h0
3214; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3215; NONEON-NOSVE-NEXT:    ldr h1, [sp, #32]
3216; NONEON-NOSVE-NEXT:    fcvt s1, h1
3217; NONEON-NOSVE-NEXT:    fcvt h0, s0
3218; NONEON-NOSVE-NEXT:    str h0, [sp, #82]
3219; NONEON-NOSVE-NEXT:    ldr h0, [sp, #48]
3220; NONEON-NOSVE-NEXT:    fcvt s0, h0
3221; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3222; NONEON-NOSVE-NEXT:    ldr h1, [sp, #14]
3223; NONEON-NOSVE-NEXT:    fcvt s1, h1
3224; NONEON-NOSVE-NEXT:    fcvt h0, s0
3225; NONEON-NOSVE-NEXT:    str h0, [sp, #80]
3226; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
3227; NONEON-NOSVE-NEXT:    fcvt s0, h0
3228; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3229; NONEON-NOSVE-NEXT:    ldr h1, [sp, #12]
3230; NONEON-NOSVE-NEXT:    fcvt s1, h1
3231; NONEON-NOSVE-NEXT:    fcvt h0, s0
3232; NONEON-NOSVE-NEXT:    str h0, [sp, #78]
3233; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
3234; NONEON-NOSVE-NEXT:    fcvt s0, h0
3235; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3236; NONEON-NOSVE-NEXT:    ldr h1, [sp, #10]
3237; NONEON-NOSVE-NEXT:    fcvt s1, h1
3238; NONEON-NOSVE-NEXT:    fcvt h0, s0
3239; NONEON-NOSVE-NEXT:    str h0, [sp, #76]
3240; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
3241; NONEON-NOSVE-NEXT:    fcvt s0, h0
3242; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3243; NONEON-NOSVE-NEXT:    ldr h1, [sp, #8]
3244; NONEON-NOSVE-NEXT:    fcvt s1, h1
3245; NONEON-NOSVE-NEXT:    fcvt h0, s0
3246; NONEON-NOSVE-NEXT:    str h0, [sp, #74]
3247; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
3248; NONEON-NOSVE-NEXT:    fcvt s0, h0
3249; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3250; NONEON-NOSVE-NEXT:    ldr h1, [sp, #6]
3251; NONEON-NOSVE-NEXT:    fcvt s1, h1
3252; NONEON-NOSVE-NEXT:    fcvt h0, s0
3253; NONEON-NOSVE-NEXT:    str h0, [sp, #72]
3254; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
3255; NONEON-NOSVE-NEXT:    fcvt s0, h0
3256; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3257; NONEON-NOSVE-NEXT:    ldr h1, [sp, #4]
3258; NONEON-NOSVE-NEXT:    fcvt s1, h1
3259; NONEON-NOSVE-NEXT:    fcvt h0, s0
3260; NONEON-NOSVE-NEXT:    str h0, [sp, #70]
3261; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
3262; NONEON-NOSVE-NEXT:    fcvt s0, h0
3263; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3264; NONEON-NOSVE-NEXT:    ldr h1, [sp, #2]
3265; NONEON-NOSVE-NEXT:    fcvt s1, h1
3266; NONEON-NOSVE-NEXT:    fcvt h0, s0
3267; NONEON-NOSVE-NEXT:    str h0, [sp, #68]
3268; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
3269; NONEON-NOSVE-NEXT:    fcvt s0, h0
3270; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3271; NONEON-NOSVE-NEXT:    ldr h1, [sp]
3272; NONEON-NOSVE-NEXT:    fcvt s1, h1
3273; NONEON-NOSVE-NEXT:    fcvt h0, s0
3274; NONEON-NOSVE-NEXT:    str h0, [sp, #66]
3275; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
3276; NONEON-NOSVE-NEXT:    fcvt s0, h0
3277; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3278; NONEON-NOSVE-NEXT:    fcvt h0, s0
3279; NONEON-NOSVE-NEXT:    str h0, [sp, #64]
3280; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
3281; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
3282; NONEON-NOSVE-NEXT:    add sp, sp, #96
3283; NONEON-NOSVE-NEXT:    ret
3284  %op1 = load <16 x half>, ptr %a
3285  %op2 = load <16 x half>, ptr %b
3286  %res = fsub <16 x half> %op1, %op2
3287  store <16 x half> %res, ptr %a
3288  ret void
3289}
3290
3291define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) {
3292; CHECK-LABEL: fsub_v2f32:
3293; CHECK:       // %bb.0:
3294; CHECK-NEXT:    ptrue p0.s, vl2
3295; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
3296; CHECK-NEXT:    // kill: def $d1 killed $d1 def $z1
3297; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
3298; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
3299; CHECK-NEXT:    ret
3300;
3301; NONEON-NOSVE-LABEL: fsub_v2f32:
3302; NONEON-NOSVE:       // %bb.0:
3303; NONEON-NOSVE-NEXT:    sub sp, sp, #32
3304; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
3305; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #8]
3306; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
3307; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
3308; NONEON-NOSVE-NEXT:    fsub s3, s2, s0
3309; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
3310; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3311; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #24]
3312; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
3313; NONEON-NOSVE-NEXT:    add sp, sp, #32
3314; NONEON-NOSVE-NEXT:    ret
3315  %res = fsub <2 x float> %op1, %op2
3316  ret <2 x float> %res
3317}
3318
3319define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) {
3320; CHECK-LABEL: fsub_v4f32:
3321; CHECK:       // %bb.0:
3322; CHECK-NEXT:    ptrue p0.s, vl4
3323; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
3324; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
3325; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
3326; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
3327; CHECK-NEXT:    ret
3328;
3329; NONEON-NOSVE-LABEL: fsub_v4f32:
3330; NONEON-NOSVE:       // %bb.0:
3331; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
3332; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
3333; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
3334; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
3335; NONEON-NOSVE-NEXT:    fsub s3, s2, s0
3336; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
3337; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3338; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
3339; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #40]
3340; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
3341; NONEON-NOSVE-NEXT:    fsub s3, s2, s0
3342; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
3343; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3344; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #32]
3345; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
3346; NONEON-NOSVE-NEXT:    add sp, sp, #48
3347; NONEON-NOSVE-NEXT:    ret
3348  %res = fsub <4 x float> %op1, %op2
3349  ret <4 x float> %res
3350}
3351
3352define void @fsub_v8f32(ptr %a, ptr %b) {
3353; CHECK-LABEL: fsub_v8f32:
3354; CHECK:       // %bb.0:
3355; CHECK-NEXT:    ldp q0, q3, [x1]
3356; CHECK-NEXT:    ptrue p0.s, vl4
3357; CHECK-NEXT:    ldp q1, q2, [x0]
3358; CHECK-NEXT:    fsubr z0.s, p0/m, z0.s, z1.s
3359; CHECK-NEXT:    movprfx z1, z2
3360; CHECK-NEXT:    fsub z1.s, p0/m, z1.s, z3.s
3361; CHECK-NEXT:    stp q0, q1, [x0]
3362; CHECK-NEXT:    ret
3363;
3364; NONEON-NOSVE-LABEL: fsub_v8f32:
3365; NONEON-NOSVE:       // %bb.0:
3366; NONEON-NOSVE-NEXT:    sub sp, sp, #96
3367; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
3368; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
3369; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
3370; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
3371; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
3372; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #40]
3373; NONEON-NOSVE-NEXT:    ldr s0, [sp, #60]
3374; NONEON-NOSVE-NEXT:    fsub s3, s2, s0
3375; NONEON-NOSVE-NEXT:    ldr s0, [sp, #56]
3376; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3377; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #32]
3378; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #88]
3379; NONEON-NOSVE-NEXT:    ldr s0, [sp, #52]
3380; NONEON-NOSVE-NEXT:    fsub s3, s2, s0
3381; NONEON-NOSVE-NEXT:    ldr s0, [sp, #48]
3382; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3383; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp, #8]
3384; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #80]
3385; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
3386; NONEON-NOSVE-NEXT:    fsub s3, s2, s0
3387; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
3388; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3389; NONEON-NOSVE-NEXT:    ldp s1, s2, [sp]
3390; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #72]
3391; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
3392; NONEON-NOSVE-NEXT:    fsub s3, s2, s0
3393; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
3394; NONEON-NOSVE-NEXT:    fsub s0, s1, s0
3395; NONEON-NOSVE-NEXT:    stp s0, s3, [sp, #64]
3396; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
3397; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
3398; NONEON-NOSVE-NEXT:    add sp, sp, #96
3399; NONEON-NOSVE-NEXT:    ret
3400  %op1 = load <8 x float>, ptr %a
3401  %op2 = load <8 x float>, ptr %b
3402  %res = fsub <8 x float> %op1, %op2
3403  store <8 x float> %res, ptr %a
3404  ret void
3405}
3406
3407define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) {
3408; CHECK-LABEL: fsub_v2f64:
3409; CHECK:       // %bb.0:
3410; CHECK-NEXT:    ptrue p0.d, vl2
3411; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
3412; CHECK-NEXT:    // kill: def $q1 killed $q1 def $z1
3413; CHECK-NEXT:    fsub z0.d, p0/m, z0.d, z1.d
3414; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
3415; CHECK-NEXT:    ret
3416;
3417; NONEON-NOSVE-LABEL: fsub_v2f64:
3418; NONEON-NOSVE:       // %bb.0:
3419; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #-48]!
3420; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 48
3421; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
3422; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
3423; NONEON-NOSVE-NEXT:    fsub d3, d2, d0
3424; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
3425; NONEON-NOSVE-NEXT:    fsub d0, d1, d0
3426; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #32]
3427; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
3428; NONEON-NOSVE-NEXT:    add sp, sp, #48
3429; NONEON-NOSVE-NEXT:    ret
3430  %res = fsub <2 x double> %op1, %op2
3431  ret <2 x double> %res
3432}
3433
3434define void @fsub_v4f64(ptr %a, ptr %b) {
3435; CHECK-LABEL: fsub_v4f64:
3436; CHECK:       // %bb.0:
3437; CHECK-NEXT:    ldp q0, q3, [x1]
3438; CHECK-NEXT:    ptrue p0.d, vl2
3439; CHECK-NEXT:    ldp q1, q2, [x0]
3440; CHECK-NEXT:    fsubr z0.d, p0/m, z0.d, z1.d
3441; CHECK-NEXT:    movprfx z1, z2
3442; CHECK-NEXT:    fsub z1.d, p0/m, z1.d, z3.d
3443; CHECK-NEXT:    stp q0, q1, [x0]
3444; CHECK-NEXT:    ret
3445;
3446; NONEON-NOSVE-LABEL: fsub_v4f64:
3447; NONEON-NOSVE:       // %bb.0:
3448; NONEON-NOSVE-NEXT:    sub sp, sp, #96
3449; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
3450; NONEON-NOSVE-NEXT:    ldp q3, q0, [x1]
3451; NONEON-NOSVE-NEXT:    ldp q2, q1, [x0]
3452; NONEON-NOSVE-NEXT:    stp q2, q3, [sp]
3453; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
3454; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp, #32]
3455; NONEON-NOSVE-NEXT:    ldr d0, [sp, #56]
3456; NONEON-NOSVE-NEXT:    fsub d3, d2, d0
3457; NONEON-NOSVE-NEXT:    ldr d0, [sp, #48]
3458; NONEON-NOSVE-NEXT:    fsub d0, d1, d0
3459; NONEON-NOSVE-NEXT:    ldp d1, d2, [sp]
3460; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #80]
3461; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
3462; NONEON-NOSVE-NEXT:    fsub d3, d2, d0
3463; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
3464; NONEON-NOSVE-NEXT:    fsub d0, d1, d0
3465; NONEON-NOSVE-NEXT:    stp d0, d3, [sp, #64]
3466; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #64]
3467; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
3468; NONEON-NOSVE-NEXT:    add sp, sp, #96
3469; NONEON-NOSVE-NEXT:    ret
3470  %op1 = load <4 x double>, ptr %a
3471  %op2 = load <4 x double>, ptr %b
3472  %res = fsub <4 x double> %op1, %op2
3473  store <4 x double> %res, ptr %a
3474  ret void
3475}
3476
3477;
3478; FABS
3479;
3480
3481define <2 x half> @fabs_v2f16(<2 x half> %op) {
3482; CHECK-LABEL: fabs_v2f16:
3483; CHECK:       // %bb.0:
3484; CHECK-NEXT:    ptrue p0.h, vl4
3485; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
3486; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
3487; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
3488; CHECK-NEXT:    ret
3489;
3490; NONEON-NOSVE-LABEL: fabs_v2f16:
3491; NONEON-NOSVE:       // %bb.0:
3492; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
3493; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
3494; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
3495; NONEON-NOSVE-NEXT:    fmov w8, s0
3496; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3497; NONEON-NOSVE-NEXT:    fmov s0, w8
3498; NONEON-NOSVE-NEXT:    str h0, [sp, #14]
3499; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
3500; NONEON-NOSVE-NEXT:    fmov w8, s0
3501; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3502; NONEON-NOSVE-NEXT:    fmov s0, w8
3503; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
3504; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
3505; NONEON-NOSVE-NEXT:    fmov w8, s0
3506; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3507; NONEON-NOSVE-NEXT:    fmov s0, w8
3508; NONEON-NOSVE-NEXT:    str h0, [sp, #10]
3509; NONEON-NOSVE-NEXT:    ldr h0, [sp]
3510; NONEON-NOSVE-NEXT:    fmov w8, s0
3511; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3512; NONEON-NOSVE-NEXT:    fmov s0, w8
3513; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
3514; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
3515; NONEON-NOSVE-NEXT:    add sp, sp, #16
3516; NONEON-NOSVE-NEXT:    ret
3517  %res = call <2 x half> @llvm.fabs.v2f16(<2 x half> %op)
3518  ret <2 x half> %res
3519}
3520
3521define <4 x half> @fabs_v4f16(<4 x half> %op) {
3522; CHECK-LABEL: fabs_v4f16:
3523; CHECK:       // %bb.0:
3524; CHECK-NEXT:    ptrue p0.h, vl4
3525; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
3526; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
3527; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
3528; CHECK-NEXT:    ret
3529;
3530; NONEON-NOSVE-LABEL: fabs_v4f16:
3531; NONEON-NOSVE:       // %bb.0:
3532; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
3533; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
3534; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
3535; NONEON-NOSVE-NEXT:    fmov w8, s0
3536; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3537; NONEON-NOSVE-NEXT:    fmov s0, w8
3538; NONEON-NOSVE-NEXT:    str h0, [sp, #14]
3539; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
3540; NONEON-NOSVE-NEXT:    fmov w8, s0
3541; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3542; NONEON-NOSVE-NEXT:    fmov s0, w8
3543; NONEON-NOSVE-NEXT:    str h0, [sp, #12]
3544; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
3545; NONEON-NOSVE-NEXT:    fmov w8, s0
3546; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3547; NONEON-NOSVE-NEXT:    fmov s0, w8
3548; NONEON-NOSVE-NEXT:    str h0, [sp, #10]
3549; NONEON-NOSVE-NEXT:    ldr h0, [sp]
3550; NONEON-NOSVE-NEXT:    fmov w8, s0
3551; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3552; NONEON-NOSVE-NEXT:    fmov s0, w8
3553; NONEON-NOSVE-NEXT:    str h0, [sp, #8]
3554; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
3555; NONEON-NOSVE-NEXT:    add sp, sp, #16
3556; NONEON-NOSVE-NEXT:    ret
3557  %res = call <4 x half> @llvm.fabs.v4f16(<4 x half> %op)
3558  ret <4 x half> %res
3559}
3560
3561define <8 x half> @fabs_v8f16(<8 x half> %op) {
3562; CHECK-LABEL: fabs_v8f16:
3563; CHECK:       // %bb.0:
3564; CHECK-NEXT:    ptrue p0.h, vl8
3565; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
3566; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
3567; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
3568; CHECK-NEXT:    ret
3569;
3570; NONEON-NOSVE-LABEL: fabs_v8f16:
3571; NONEON-NOSVE:       // %bb.0:
3572; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
3573; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
3574; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
3575; NONEON-NOSVE-NEXT:    fmov w8, s0
3576; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3577; NONEON-NOSVE-NEXT:    fmov s0, w8
3578; NONEON-NOSVE-NEXT:    str h0, [sp, #30]
3579; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
3580; NONEON-NOSVE-NEXT:    fmov w8, s0
3581; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3582; NONEON-NOSVE-NEXT:    fmov s0, w8
3583; NONEON-NOSVE-NEXT:    str h0, [sp, #28]
3584; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
3585; NONEON-NOSVE-NEXT:    fmov w8, s0
3586; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3587; NONEON-NOSVE-NEXT:    fmov s0, w8
3588; NONEON-NOSVE-NEXT:    str h0, [sp, #26]
3589; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
3590; NONEON-NOSVE-NEXT:    fmov w8, s0
3591; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3592; NONEON-NOSVE-NEXT:    fmov s0, w8
3593; NONEON-NOSVE-NEXT:    str h0, [sp, #24]
3594; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
3595; NONEON-NOSVE-NEXT:    fmov w8, s0
3596; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3597; NONEON-NOSVE-NEXT:    fmov s0, w8
3598; NONEON-NOSVE-NEXT:    str h0, [sp, #22]
3599; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
3600; NONEON-NOSVE-NEXT:    fmov w8, s0
3601; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3602; NONEON-NOSVE-NEXT:    fmov s0, w8
3603; NONEON-NOSVE-NEXT:    str h0, [sp, #20]
3604; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
3605; NONEON-NOSVE-NEXT:    fmov w8, s0
3606; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3607; NONEON-NOSVE-NEXT:    fmov s0, w8
3608; NONEON-NOSVE-NEXT:    str h0, [sp, #18]
3609; NONEON-NOSVE-NEXT:    ldr h0, [sp]
3610; NONEON-NOSVE-NEXT:    fmov w8, s0
3611; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3612; NONEON-NOSVE-NEXT:    fmov s0, w8
3613; NONEON-NOSVE-NEXT:    str h0, [sp, #16]
3614; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
3615; NONEON-NOSVE-NEXT:    add sp, sp, #32
3616; NONEON-NOSVE-NEXT:    ret
3617  %res = call <8 x half> @llvm.fabs.v8f16(<8 x half> %op)
3618  ret <8 x half> %res
3619}
3620
3621define void @fabs_v16f16(ptr %a) {
3622; CHECK-LABEL: fabs_v16f16:
3623; CHECK:       // %bb.0:
3624; CHECK-NEXT:    ldp q0, q1, [x0]
3625; CHECK-NEXT:    ptrue p0.h, vl8
3626; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
3627; CHECK-NEXT:    fabs z1.h, p0/m, z1.h
3628; CHECK-NEXT:    stp q0, q1, [x0]
3629; CHECK-NEXT:    ret
3630;
3631; NONEON-NOSVE-LABEL: fabs_v16f16:
3632; NONEON-NOSVE:       // %bb.0:
3633; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
3634; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
3635; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
3636; NONEON-NOSVE-NEXT:    ldr h0, [sp, #30]
3637; NONEON-NOSVE-NEXT:    fmov w8, s0
3638; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3639; NONEON-NOSVE-NEXT:    fmov s0, w8
3640; NONEON-NOSVE-NEXT:    str h0, [sp, #62]
3641; NONEON-NOSVE-NEXT:    ldr h0, [sp, #28]
3642; NONEON-NOSVE-NEXT:    fmov w8, s0
3643; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3644; NONEON-NOSVE-NEXT:    fmov s0, w8
3645; NONEON-NOSVE-NEXT:    str h0, [sp, #60]
3646; NONEON-NOSVE-NEXT:    ldr h0, [sp, #26]
3647; NONEON-NOSVE-NEXT:    fmov w8, s0
3648; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3649; NONEON-NOSVE-NEXT:    fmov s0, w8
3650; NONEON-NOSVE-NEXT:    str h0, [sp, #58]
3651; NONEON-NOSVE-NEXT:    ldr h0, [sp, #24]
3652; NONEON-NOSVE-NEXT:    fmov w8, s0
3653; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3654; NONEON-NOSVE-NEXT:    fmov s0, w8
3655; NONEON-NOSVE-NEXT:    str h0, [sp, #56]
3656; NONEON-NOSVE-NEXT:    ldr h0, [sp, #22]
3657; NONEON-NOSVE-NEXT:    fmov w8, s0
3658; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3659; NONEON-NOSVE-NEXT:    fmov s0, w8
3660; NONEON-NOSVE-NEXT:    str h0, [sp, #54]
3661; NONEON-NOSVE-NEXT:    ldr h0, [sp, #20]
3662; NONEON-NOSVE-NEXT:    fmov w8, s0
3663; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3664; NONEON-NOSVE-NEXT:    fmov s0, w8
3665; NONEON-NOSVE-NEXT:    str h0, [sp, #52]
3666; NONEON-NOSVE-NEXT:    ldr h0, [sp, #18]
3667; NONEON-NOSVE-NEXT:    fmov w8, s0
3668; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3669; NONEON-NOSVE-NEXT:    fmov s0, w8
3670; NONEON-NOSVE-NEXT:    str h0, [sp, #50]
3671; NONEON-NOSVE-NEXT:    ldr h0, [sp, #16]
3672; NONEON-NOSVE-NEXT:    fmov w8, s0
3673; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3674; NONEON-NOSVE-NEXT:    fmov s0, w8
3675; NONEON-NOSVE-NEXT:    str h0, [sp, #48]
3676; NONEON-NOSVE-NEXT:    ldr h0, [sp, #14]
3677; NONEON-NOSVE-NEXT:    fmov w8, s0
3678; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3679; NONEON-NOSVE-NEXT:    fmov s0, w8
3680; NONEON-NOSVE-NEXT:    str h0, [sp, #46]
3681; NONEON-NOSVE-NEXT:    ldr h0, [sp, #12]
3682; NONEON-NOSVE-NEXT:    fmov w8, s0
3683; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3684; NONEON-NOSVE-NEXT:    fmov s0, w8
3685; NONEON-NOSVE-NEXT:    str h0, [sp, #44]
3686; NONEON-NOSVE-NEXT:    ldr h0, [sp, #10]
3687; NONEON-NOSVE-NEXT:    fmov w8, s0
3688; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3689; NONEON-NOSVE-NEXT:    fmov s0, w8
3690; NONEON-NOSVE-NEXT:    str h0, [sp, #42]
3691; NONEON-NOSVE-NEXT:    ldr h0, [sp, #8]
3692; NONEON-NOSVE-NEXT:    fmov w8, s0
3693; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3694; NONEON-NOSVE-NEXT:    fmov s0, w8
3695; NONEON-NOSVE-NEXT:    str h0, [sp, #40]
3696; NONEON-NOSVE-NEXT:    ldr h0, [sp, #6]
3697; NONEON-NOSVE-NEXT:    fmov w8, s0
3698; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3699; NONEON-NOSVE-NEXT:    fmov s0, w8
3700; NONEON-NOSVE-NEXT:    str h0, [sp, #38]
3701; NONEON-NOSVE-NEXT:    ldr h0, [sp, #4]
3702; NONEON-NOSVE-NEXT:    fmov w8, s0
3703; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3704; NONEON-NOSVE-NEXT:    fmov s0, w8
3705; NONEON-NOSVE-NEXT:    str h0, [sp, #36]
3706; NONEON-NOSVE-NEXT:    ldr h0, [sp, #2]
3707; NONEON-NOSVE-NEXT:    fmov w8, s0
3708; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3709; NONEON-NOSVE-NEXT:    fmov s0, w8
3710; NONEON-NOSVE-NEXT:    str h0, [sp, #34]
3711; NONEON-NOSVE-NEXT:    ldr h0, [sp]
3712; NONEON-NOSVE-NEXT:    fmov w8, s0
3713; NONEON-NOSVE-NEXT:    and w8, w8, #0x7fff
3714; NONEON-NOSVE-NEXT:    fmov s0, w8
3715; NONEON-NOSVE-NEXT:    str h0, [sp, #32]
3716; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
3717; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
3718; NONEON-NOSVE-NEXT:    add sp, sp, #64
3719; NONEON-NOSVE-NEXT:    ret
3720  %op = load <16 x half>, ptr %a
3721  %res = call <16 x half> @llvm.fabs.v16f16(<16 x half> %op)
3722  store <16 x half> %res, ptr %a
3723  ret void
3724}
3725
3726define <2 x float> @fabs_v2f32(<2 x float> %op) {
3727; CHECK-LABEL: fabs_v2f32:
3728; CHECK:       // %bb.0:
3729; CHECK-NEXT:    ptrue p0.s, vl2
3730; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
3731; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
3732; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
3733; CHECK-NEXT:    ret
3734;
3735; NONEON-NOSVE-LABEL: fabs_v2f32:
3736; NONEON-NOSVE:       // %bb.0:
3737; NONEON-NOSVE-NEXT:    str d0, [sp, #-16]!
3738; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 16
3739; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
3740; NONEON-NOSVE-NEXT:    fabs s1, s0
3741; NONEON-NOSVE-NEXT:    ldr s0, [sp]
3742; NONEON-NOSVE-NEXT:    fabs s0, s0
3743; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #8]
3744; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
3745; NONEON-NOSVE-NEXT:    add sp, sp, #16
3746; NONEON-NOSVE-NEXT:    ret
3747  %res = call <2 x float> @llvm.fabs.v2f32(<2 x float> %op)
3748  ret <2 x float> %res
3749}
3750
3751define <4 x float> @fabs_v4f32(<4 x float> %op) {
3752; CHECK-LABEL: fabs_v4f32:
3753; CHECK:       // %bb.0:
3754; CHECK-NEXT:    ptrue p0.s, vl4
3755; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
3756; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
3757; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
3758; CHECK-NEXT:    ret
3759;
3760; NONEON-NOSVE-LABEL: fabs_v4f32:
3761; NONEON-NOSVE:       // %bb.0:
3762; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
3763; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
3764; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
3765; NONEON-NOSVE-NEXT:    fabs s1, s0
3766; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
3767; NONEON-NOSVE-NEXT:    fabs s0, s0
3768; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #24]
3769; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
3770; NONEON-NOSVE-NEXT:    fabs s1, s0
3771; NONEON-NOSVE-NEXT:    ldr s0, [sp]
3772; NONEON-NOSVE-NEXT:    fabs s0, s0
3773; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #16]
3774; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
3775; NONEON-NOSVE-NEXT:    add sp, sp, #32
3776; NONEON-NOSVE-NEXT:    ret
3777  %res = call <4 x float> @llvm.fabs.v4f32(<4 x float> %op)
3778  ret <4 x float> %res
3779}
3780
3781define void @fabs_v8f32(ptr %a) {
3782; CHECK-LABEL: fabs_v8f32:
3783; CHECK:       // %bb.0:
3784; CHECK-NEXT:    ldp q0, q1, [x0]
3785; CHECK-NEXT:    ptrue p0.s, vl4
3786; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
3787; CHECK-NEXT:    fabs z1.s, p0/m, z1.s
3788; CHECK-NEXT:    stp q0, q1, [x0]
3789; CHECK-NEXT:    ret
3790;
3791; NONEON-NOSVE-LABEL: fabs_v8f32:
3792; NONEON-NOSVE:       // %bb.0:
3793; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
3794; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
3795; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
3796; NONEON-NOSVE-NEXT:    ldr s0, [sp, #28]
3797; NONEON-NOSVE-NEXT:    fabs s1, s0
3798; NONEON-NOSVE-NEXT:    ldr s0, [sp, #24]
3799; NONEON-NOSVE-NEXT:    fabs s0, s0
3800; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #56]
3801; NONEON-NOSVE-NEXT:    ldr s0, [sp, #20]
3802; NONEON-NOSVE-NEXT:    fabs s1, s0
3803; NONEON-NOSVE-NEXT:    ldr s0, [sp, #16]
3804; NONEON-NOSVE-NEXT:    fabs s0, s0
3805; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #48]
3806; NONEON-NOSVE-NEXT:    ldr s0, [sp, #12]
3807; NONEON-NOSVE-NEXT:    fabs s1, s0
3808; NONEON-NOSVE-NEXT:    ldr s0, [sp, #8]
3809; NONEON-NOSVE-NEXT:    fabs s0, s0
3810; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #40]
3811; NONEON-NOSVE-NEXT:    ldr s0, [sp, #4]
3812; NONEON-NOSVE-NEXT:    fabs s1, s0
3813; NONEON-NOSVE-NEXT:    ldr s0, [sp]
3814; NONEON-NOSVE-NEXT:    fabs s0, s0
3815; NONEON-NOSVE-NEXT:    stp s0, s1, [sp, #32]
3816; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
3817; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
3818; NONEON-NOSVE-NEXT:    add sp, sp, #64
3819; NONEON-NOSVE-NEXT:    ret
3820  %op = load <8 x float>, ptr %a
3821  %res = call <8 x float> @llvm.fabs.v8f32(<8 x float> %op)
3822  store <8 x float> %res, ptr %a
3823  ret void
3824}
3825
3826define <2 x double> @fabs_v2f64(<2 x double> %op) {
3827; CHECK-LABEL: fabs_v2f64:
3828; CHECK:       // %bb.0:
3829; CHECK-NEXT:    ptrue p0.d, vl2
3830; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
3831; CHECK-NEXT:    fabs z0.d, p0/m, z0.d
3832; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
3833; CHECK-NEXT:    ret
3834;
3835; NONEON-NOSVE-LABEL: fabs_v2f64:
3836; NONEON-NOSVE:       // %bb.0:
3837; NONEON-NOSVE-NEXT:    str q0, [sp, #-32]!
3838; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 32
3839; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
3840; NONEON-NOSVE-NEXT:    fabs d1, d0
3841; NONEON-NOSVE-NEXT:    ldr d0, [sp]
3842; NONEON-NOSVE-NEXT:    fabs d0, d0
3843; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #16]
3844; NONEON-NOSVE-NEXT:    ldr q0, [sp, #16]
3845; NONEON-NOSVE-NEXT:    add sp, sp, #32
3846; NONEON-NOSVE-NEXT:    ret
3847  %res = call <2 x double> @llvm.fabs.v2f64(<2 x double> %op)
3848  ret <2 x double> %res
3849}
3850
3851define void @fabs_v4f64(ptr %a) {
3852; CHECK-LABEL: fabs_v4f64:
3853; CHECK:       // %bb.0:
3854; CHECK-NEXT:    ldp q0, q1, [x0]
3855; CHECK-NEXT:    ptrue p0.d, vl2
3856; CHECK-NEXT:    fabs z0.d, p0/m, z0.d
3857; CHECK-NEXT:    fabs z1.d, p0/m, z1.d
3858; CHECK-NEXT:    stp q0, q1, [x0]
3859; CHECK-NEXT:    ret
3860;
3861; NONEON-NOSVE-LABEL: fabs_v4f64:
3862; NONEON-NOSVE:       // %bb.0:
3863; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
3864; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-64]!
3865; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 64
3866; NONEON-NOSVE-NEXT:    ldr d0, [sp, #24]
3867; NONEON-NOSVE-NEXT:    fabs d1, d0
3868; NONEON-NOSVE-NEXT:    ldr d0, [sp, #16]
3869; NONEON-NOSVE-NEXT:    fabs d0, d0
3870; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #48]
3871; NONEON-NOSVE-NEXT:    ldr d0, [sp, #8]
3872; NONEON-NOSVE-NEXT:    fabs d1, d0
3873; NONEON-NOSVE-NEXT:    ldr d0, [sp]
3874; NONEON-NOSVE-NEXT:    fabs d0, d0
3875; NONEON-NOSVE-NEXT:    stp d0, d1, [sp, #32]
3876; NONEON-NOSVE-NEXT:    ldp q0, q1, [sp, #32]
3877; NONEON-NOSVE-NEXT:    stp q0, q1, [x0]
3878; NONEON-NOSVE-NEXT:    add sp, sp, #64
3879; NONEON-NOSVE-NEXT:    ret
3880  %op = load <4 x double>, ptr %a
3881  %res = call <4 x double> @llvm.fabs.v4f64(<4 x double> %op)
3882  store <4 x double> %res, ptr %a
3883  ret void
3884}
3885
3886declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
3887declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
3888declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
3889declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
3890declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
3891declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
3892declare <8 x float> @llvm.fma.v8f32(<8 x float>, <8 x float>, <8 x float>)
3893declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
3894declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>)
3895
3896declare <2 x half> @llvm.sqrt.v2f16(<2 x half>)
3897declare <4 x half> @llvm.sqrt.v4f16(<4 x half>)
3898declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
3899declare <16 x half> @llvm.sqrt.v16f16(<16 x half>)
3900declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
3901declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
3902declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
3903declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
3904declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
3905
3906declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
3907declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
3908declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
3909declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
3910declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
3911declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
3912declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
3913declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
3914declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
3915