xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fp.ll (revision 1ee315ae7964c8433b772e0b5d667834994ba753)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4define <vscale x 8 x half> @fadd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
5; CHECK-LABEL: fadd_nxv8f16:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
8; CHECK-NEXT:    ret
9  %res = fadd <vscale x 8 x half> %a, %b
10  ret <vscale x 8 x half> %res
11}
12
13define <vscale x 4 x half> @fadd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
14; CHECK-LABEL: fadd_nxv4f16:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    ptrue p0.s
17; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
18; CHECK-NEXT:    ret
19  %res = fadd <vscale x 4 x half> %a, %b
20  ret <vscale x 4 x half> %res
21}
22
23define <vscale x 2 x half> @fadd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
24; CHECK-LABEL: fadd_nxv2f16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    ptrue p0.d
27; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
28; CHECK-NEXT:    ret
29  %res = fadd <vscale x 2 x half> %a, %b
30  ret <vscale x 2 x half> %res
31}
32
33define <vscale x 4 x float> @fadd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
34; CHECK-LABEL: fadd_nxv4f32:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
37; CHECK-NEXT:    ret
38  %res = fadd <vscale x 4 x float> %a, %b
39  ret <vscale x 4 x float> %res
40}
41
42define <vscale x 2 x float> @fadd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
43; CHECK-LABEL: fadd_nxv2f32:
44; CHECK:       // %bb.0:
45; CHECK-NEXT:    ptrue p0.d
46; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
47; CHECK-NEXT:    ret
48  %res = fadd <vscale x 2 x float> %a, %b
49  ret <vscale x 2 x float> %res
50}
51
52define <vscale x 2 x double> @fadd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
53; CHECK-LABEL: fadd_nxv2f64:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
56; CHECK-NEXT:    ret
57  %res = fadd <vscale x 2 x double> %a, %b
58  ret <vscale x 2 x double> %res
59}
60
61define <vscale x 8 x half> @fdiv_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
62; CHECK-LABEL: fdiv_nxv8f16:
63; CHECK:       // %bb.0:
64; CHECK-NEXT:    ptrue p0.h
65; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
66; CHECK-NEXT:    ret
67  %res = fdiv <vscale x 8 x half> %a, %b
68  ret <vscale x 8 x half> %res
69}
70
71define <vscale x 4 x half> @fdiv_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
72; CHECK-LABEL: fdiv_nxv4f16:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    ptrue p0.s
75; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
76; CHECK-NEXT:    ret
77  %res = fdiv <vscale x 4 x half> %a, %b
78  ret <vscale x 4 x half> %res
79}
80
81define <vscale x 2 x half> @fdiv_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
82; CHECK-LABEL: fdiv_nxv2f16:
83; CHECK:       // %bb.0:
84; CHECK-NEXT:    ptrue p0.d
85; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
86; CHECK-NEXT:    ret
87  %res = fdiv <vscale x 2 x half> %a, %b
88  ret <vscale x 2 x half> %res
89}
90
91define <vscale x 4 x float> @fdiv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
92; CHECK-LABEL: fdiv_nxv4f32:
93; CHECK:       // %bb.0:
94; CHECK-NEXT:    ptrue p0.s
95; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
96; CHECK-NEXT:    ret
97  %res = fdiv <vscale x 4 x float> %a, %b
98  ret <vscale x 4 x float> %res
99}
100
101define <vscale x 2 x float> @fdiv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
102; CHECK-LABEL: fdiv_nxv2f32:
103; CHECK:       // %bb.0:
104; CHECK-NEXT:    ptrue p0.d
105; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
106; CHECK-NEXT:    ret
107  %res = fdiv <vscale x 2 x float> %a, %b
108  ret <vscale x 2 x float> %res
109}
110
111define <vscale x 2 x double> @fdiv_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
112; CHECK-LABEL: fdiv_nxv2f64:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    ptrue p0.d
115; CHECK-NEXT:    fdiv z0.d, p0/m, z0.d, z1.d
116; CHECK-NEXT:    ret
117  %res = fdiv <vscale x 2 x double> %a, %b
118  ret <vscale x 2 x double> %res
119}
120
121define <vscale x 8 x half> @fsub_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
122; CHECK-LABEL: fsub_nxv8f16:
123; CHECK:       // %bb.0:
124; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
125; CHECK-NEXT:    ret
126  %res = fsub <vscale x 8 x half> %a, %b
127  ret <vscale x 8 x half> %res
128}
129
130define <vscale x 4 x half> @fsub_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
131; CHECK-LABEL: fsub_nxv4f16:
132; CHECK:       // %bb.0:
133; CHECK-NEXT:    ptrue p0.s
134; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
135; CHECK-NEXT:    ret
136  %res = fsub <vscale x 4 x half> %a, %b
137  ret <vscale x 4 x half> %res
138}
139
140define <vscale x 2 x half> @fsub_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
141; CHECK-LABEL: fsub_nxv2f16:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    ptrue p0.d
144; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
145; CHECK-NEXT:    ret
146  %res = fsub <vscale x 2 x half> %a, %b
147  ret <vscale x 2 x half> %res
148}
149
150define <vscale x 4 x float> @fsub_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
151; CHECK-LABEL: fsub_nxv4f32:
152; CHECK:       // %bb.0:
153; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
154; CHECK-NEXT:    ret
155  %res = fsub <vscale x 4 x float> %a, %b
156  ret <vscale x 4 x float> %res
157}
158
159define <vscale x 2 x float> @fsub_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
160; CHECK-LABEL: fsub_nxv2f32:
161; CHECK:       // %bb.0:
162; CHECK-NEXT:    ptrue p0.d
163; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
164; CHECK-NEXT:    ret
165  %res = fsub <vscale x 2 x float> %a, %b
166  ret <vscale x 2 x float> %res
167}
168
169define <vscale x 2 x double> @fsub_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
170; CHECK-LABEL: fsub_nxv2f64:
171; CHECK:       // %bb.0:
172; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
173; CHECK-NEXT:    ret
174  %res = fsub <vscale x 2 x double> %a, %b
175  ret <vscale x 2 x double> %res
176}
177
178define <vscale x 8 x half> @fmul_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
179; CHECK-LABEL: fmul_nxv8f16:
180; CHECK:       // %bb.0:
181; CHECK-NEXT:    fmul z0.h, z0.h, z1.h
182; CHECK-NEXT:    ret
183  %res = fmul <vscale x 8 x half> %a, %b
184  ret <vscale x 8 x half> %res
185}
186
187define <vscale x 4 x half> @fmul_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
188; CHECK-LABEL: fmul_nxv4f16:
189; CHECK:       // %bb.0:
190; CHECK-NEXT:    ptrue p0.s
191; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
192; CHECK-NEXT:    ret
193  %res = fmul <vscale x 4 x half> %a, %b
194  ret <vscale x 4 x half> %res
195}
196
197define <vscale x 2 x half> @fmul_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
198; CHECK-LABEL: fmul_nxv2f16:
199; CHECK:       // %bb.0:
200; CHECK-NEXT:    ptrue p0.d
201; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
202; CHECK-NEXT:    ret
203  %res = fmul <vscale x 2 x half> %a, %b
204  ret <vscale x 2 x half> %res
205}
206
207define <vscale x 4 x float> @fmul_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
208; CHECK-LABEL: fmul_nxv4f32:
209; CHECK:       // %bb.0:
210; CHECK-NEXT:    fmul z0.s, z0.s, z1.s
211; CHECK-NEXT:    ret
212  %res = fmul <vscale x 4 x float> %a, %b
213  ret <vscale x 4 x float> %res
214}
215
216define <vscale x 2 x float> @fmul_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
217; CHECK-LABEL: fmul_nxv2f32:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    ptrue p0.d
220; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
221; CHECK-NEXT:    ret
222  %res = fmul <vscale x 2 x float> %a, %b
223  ret <vscale x 2 x float> %res
224}
225
226define <vscale x 2 x double> @fmul_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
227; CHECK-LABEL: fmul_nxv2f64:
228; CHECK:       // %bb.0:
229; CHECK-NEXT:    fmul z0.d, z0.d, z1.d
230; CHECK-NEXT:    ret
231  %res = fmul <vscale x 2 x double> %a, %b
232  ret <vscale x 2 x double> %res
233}
234
235define <vscale x 8 x half> @fma_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
236; CHECK-LABEL: fma_nxv8f16:
237; CHECK:       // %bb.0:
238; CHECK-NEXT:    ptrue p0.h
239; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
240; CHECK-NEXT:    ret
241  %r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
242  ret <vscale x 8 x half> %r
243}
244
245define <vscale x 4 x half> @fma_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) {
246; CHECK-LABEL: fma_nxv4f16:
247; CHECK:       // %bb.0:
248; CHECK-NEXT:    ptrue p0.s
249; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
250; CHECK-NEXT:    ret
251  %r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c)
252  ret <vscale x 4 x half> %r
253}
254
255define <vscale x 2 x half> @fma_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) {
256; CHECK-LABEL: fma_nxv2f16:
257; CHECK:       // %bb.0:
258; CHECK-NEXT:    ptrue p0.d
259; CHECK-NEXT:    fmad z0.h, p0/m, z1.h, z2.h
260; CHECK-NEXT:    ret
261  %r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c)
262  ret <vscale x 2 x half> %r
263}
264
265define <vscale x 4 x float> @fma_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
266; CHECK-LABEL: fma_nxv4f32:
267; CHECK:       // %bb.0:
268; CHECK-NEXT:    ptrue p0.s
269; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
270; CHECK-NEXT:    ret
271  %r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
272  ret <vscale x 4 x float> %r
273}
274
275define <vscale x 2 x float> @fma_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) {
276; CHECK-LABEL: fma_nxv2f32:
277; CHECK:       // %bb.0:
278; CHECK-NEXT:    ptrue p0.d
279; CHECK-NEXT:    fmad z0.s, p0/m, z1.s, z2.s
280; CHECK-NEXT:    ret
281  %r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c)
282  ret <vscale x 2 x float> %r
283}
284
285define <vscale x 2 x double> @fma_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
286; CHECK-LABEL: fma_nxv2f64_1:
287; CHECK:       // %bb.0:
288; CHECK-NEXT:    ptrue p0.d
289; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
290; CHECK-NEXT:    ret
291  %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
292  ret <vscale x 2 x double> %r
293}
294
295define <vscale x 2 x double> @fma_nxv2f64_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
296; CHECK-LABEL: fma_nxv2f64_2:
297; CHECK:       // %bb.0:
298; CHECK-NEXT:    ptrue p0.d
299; CHECK-NEXT:    fmad z0.d, p0/m, z1.d, z2.d
300; CHECK-NEXT:    ret
301  %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x double> %a, <vscale x 2 x double> %c)
302  ret <vscale x 2 x double> %r
303}
304
305define <vscale x 2 x double> @fma_nxv2f64_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
306; CHECK-LABEL: fma_nxv2f64_3:
307; CHECK:       // %bb.0:
308; CHECK-NEXT:    ptrue p0.d
309; CHECK-NEXT:    fmla z0.d, p0/m, z2.d, z1.d
310; CHECK-NEXT:    ret
311  %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %c, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
312  ret <vscale x 2 x double> %r
313}
314
315define <vscale x 8 x half> @fmls_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
316; CHECK-LABEL: fmls_nxv8f16:
317; CHECK:       // %bb.0:
318; CHECK-NEXT:    ptrue p0.h
319; CHECK-NEXT:    fmls z0.h, p0/m, z1.h, z2.h
320; CHECK-NEXT:    ret
321  %neg = fneg <vscale x 8 x half> %b
322  %r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %c, <vscale x 8 x half> %neg, <vscale x 8 x half> %a)
323  ret <vscale x 8 x half> %r
324}
325
326define <vscale x 4 x half> @fmls_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) {
327; CHECK-LABEL: fmls_nxv4f16:
328; CHECK:       // %bb.0:
329; CHECK-NEXT:    ptrue p0.s
330; CHECK-NEXT:    fmls z0.h, p0/m, z1.h, z2.h
331; CHECK-NEXT:    ret
332  %neg = fneg <vscale x 4 x half> %b
333  %r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %c, <vscale x 4 x half> %neg, <vscale x 4 x half> %a)
334  ret <vscale x 4 x half> %r
335}
336
337define <vscale x 2 x half> @fmls_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) {
338; CHECK-LABEL: fmls_nxv2f16:
339; CHECK:       // %bb.0:
340; CHECK-NEXT:    ptrue p0.d
341; CHECK-NEXT:    fmls z0.h, p0/m, z1.h, z2.h
342; CHECK-NEXT:    ret
343  %neg = fneg <vscale x 2 x half> %b
344  %r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %c, <vscale x 2 x half> %neg, <vscale x 2 x half> %a)
345  ret <vscale x 2 x half> %r
346}
347
348define <vscale x 4 x float> @fmls_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
349; CHECK-LABEL: fmls_nxv4f32:
350; CHECK:       // %bb.0:
351; CHECK-NEXT:    ptrue p0.s
352; CHECK-NEXT:    fmls z0.s, p0/m, z1.s, z2.s
353; CHECK-NEXT:    ret
354  %neg = fneg <vscale x 4 x float> %b
355  %r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %c, <vscale x 4 x float> %neg, <vscale x 4 x float> %a)
356  ret <vscale x 4 x float> %r
357}
358
359define <vscale x 2 x float> @fmls_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) {
360; CHECK-LABEL: fmls_nxv2f32:
361; CHECK:       // %bb.0:
362; CHECK-NEXT:    ptrue p0.d
363; CHECK-NEXT:    fmls z0.s, p0/m, z1.s, z2.s
364; CHECK-NEXT:    ret
365  %neg = fneg <vscale x 2 x float> %b
366  %r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %c, <vscale x 2 x float> %neg, <vscale x 2 x float> %a)
367  ret <vscale x 2 x float> %r
368}
369
370define <vscale x 2 x double> @fmls_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
371; CHECK-LABEL: fmls_nxv2f64:
372; CHECK:       // %bb.0:
373; CHECK-NEXT:    ptrue p0.d
374; CHECK-NEXT:    fmls z0.d, p0/m, z1.d, z2.d
375; CHECK-NEXT:    ret
376  %neg = fneg <vscale x 2 x double> %b
377  %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %c, <vscale x 2 x double> %neg, <vscale x 2 x double> %a)
378  ret <vscale x 2 x double> %r
379}
380
381define <vscale x 8 x half> @fneg_nxv8f16(<vscale x 8 x half> %a) {
382; CHECK-LABEL: fneg_nxv8f16:
383; CHECK:       // %bb.0:
384; CHECK-NEXT:    ptrue p0.h
385; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
386; CHECK-NEXT:    ret
387  %res = fneg <vscale x 8 x half> %a
388  ret <vscale x 8 x half> %res
389}
390
391define <vscale x 4 x half> @fneg_nxv4f16(<vscale x 4 x half> %a) {
392; CHECK-LABEL: fneg_nxv4f16:
393; CHECK:       // %bb.0:
394; CHECK-NEXT:    ptrue p0.s
395; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
396; CHECK-NEXT:    ret
397  %res = fneg <vscale x 4 x half> %a
398  ret <vscale x 4 x half> %res
399}
400
401define <vscale x 2 x half> @fneg_nxv2f16(<vscale x 2 x half> %a) {
402; CHECK-LABEL: fneg_nxv2f16:
403; CHECK:       // %bb.0:
404; CHECK-NEXT:    ptrue p0.d
405; CHECK-NEXT:    fneg z0.h, p0/m, z0.h
406; CHECK-NEXT:    ret
407  %res = fneg <vscale x 2 x half> %a
408  ret <vscale x 2 x half> %res
409}
410
411define <vscale x 4 x float> @fneg_nxv4f32(<vscale x 4 x float> %a) {
412; CHECK-LABEL: fneg_nxv4f32:
413; CHECK:       // %bb.0:
414; CHECK-NEXT:    ptrue p0.s
415; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
416; CHECK-NEXT:    ret
417  %res = fneg <vscale x 4 x float> %a
418  ret <vscale x 4 x float> %res
419}
420
421define <vscale x 2 x float> @fneg_nxv2f32(<vscale x 2 x float> %a) {
422; CHECK-LABEL: fneg_nxv2f32:
423; CHECK:       // %bb.0:
424; CHECK-NEXT:    ptrue p0.d
425; CHECK-NEXT:    fneg z0.s, p0/m, z0.s
426; CHECK-NEXT:    ret
427  %res = fneg <vscale x 2 x float> %a
428  ret <vscale x 2 x float> %res
429}
430
431define <vscale x 2 x double> @fneg_nxv2f64(<vscale x 2 x double> %a) {
432; CHECK-LABEL: fneg_nxv2f64:
433; CHECK:       // %bb.0:
434; CHECK-NEXT:    ptrue p0.d
435; CHECK-NEXT:    fneg z0.d, p0/m, z0.d
436; CHECK-NEXT:    ret
437  %res = fneg <vscale x 2 x double> %a
438  ret <vscale x 2 x double> %res
439}
440
441define <vscale x 8 x half> @frecps_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
442; CHECK-LABEL: frecps_h:
443; CHECK:       // %bb.0:
444; CHECK-NEXT:    frecps z0.h, z0.h, z1.h
445; CHECK-NEXT:    ret
446  %res = call <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
447  ret <vscale x 8 x half> %res
448}
449
450define <vscale x 4 x float> @frecps_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
451; CHECK-LABEL: frecps_s:
452; CHECK:       // %bb.0:
453; CHECK-NEXT:    frecps z0.s, z0.s, z1.s
454; CHECK-NEXT:    ret
455  %res = call <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
456  ret <vscale x 4 x float> %res
457}
458
459define <vscale x 2 x double> @frecps_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
460; CHECK-LABEL: frecps_d:
461; CHECK:       // %bb.0:
462; CHECK-NEXT:    frecps z0.d, z0.d, z1.d
463; CHECK-NEXT:    ret
464  %res = call <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
465  ret <vscale x 2 x double> %res
466}
467
468define <vscale x 8 x half> @frsqrts_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
469; CHECK-LABEL: frsqrts_h:
470; CHECK:       // %bb.0:
471; CHECK-NEXT:    frsqrts z0.h, z0.h, z1.h
472; CHECK-NEXT:    ret
473  %res = call <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
474  ret <vscale x 8 x half> %res
475}
476
477define <vscale x 4 x float> @frsqrts_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
478; CHECK-LABEL: frsqrts_s:
479; CHECK:       // %bb.0:
480; CHECK-NEXT:    frsqrts z0.s, z0.s, z1.s
481; CHECK-NEXT:    ret
482  %res = call <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
483  ret <vscale x 4 x float> %res
484}
485
486define <vscale x 2 x double> @frsqrts_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
487; CHECK-LABEL: frsqrts_d:
488; CHECK:       // %bb.0:
489; CHECK-NEXT:    frsqrts z0.d, z0.d, z1.d
490; CHECK-NEXT:    ret
491  %res = call <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
492  ret <vscale x 2 x double> %res
493}
494
495%complex = type { { double, double } }
496
497define void @scalar_to_vector(ptr %outval, <vscale x 2 x i1> %pred, <vscale x 2 x double> %in1, <vscale x 2 x double> %in2) {
498; CHECK-LABEL: scalar_to_vector:
499; CHECK:       // %bb.0:
500; CHECK-NEXT:    faddv d0, p0, z0.d
501; CHECK-NEXT:    faddv d1, p0, z1.d
502; CHECK-NEXT:    mov v0.d[1], v1.d[0]
503; CHECK-NEXT:    str q0, [x0]
504; CHECK-NEXT:    ret
505  %imagp = getelementptr inbounds %complex, ptr %outval, i64 0, i32 0, i32 1
506  %1 = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pred, <vscale x 2 x double> %in1)
507  %2 = call double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1> %pred, <vscale x 2 x double> %in2)
508  store double %1, ptr %outval, align 8
509  store double %2, ptr %imagp, align 8
510  ret void
511}
512
513define void @float_copy(ptr %P1, ptr %P2) {
514; CHECK-LABEL: float_copy:
515; CHECK:       // %bb.0:
516; CHECK-NEXT:    ptrue p0.s
517; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
518; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
519; CHECK-NEXT:    ret
520  %A = load <vscale x 4 x float>, ptr %P1, align 16
521  store <vscale x 4 x float> %A, ptr %P2, align 16
522  ret void
523}
524
525; FSQRT
526
527define <vscale x 8 x half> @fsqrt_nxv8f16(<vscale x 8 x half> %a) {
528; CHECK-LABEL: fsqrt_nxv8f16:
529; CHECK:       // %bb.0:
530; CHECK-NEXT:    ptrue p0.h
531; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
532; CHECK-NEXT:    ret
533  %res = call <vscale x 8 x half> @llvm.sqrt.nxv8f16(<vscale x 8 x half> %a)
534  ret <vscale x 8 x half> %res
535}
536
537define <vscale x 4 x half> @fsqrt_nxv4f16(<vscale x 4 x half> %a) {
538; CHECK-LABEL: fsqrt_nxv4f16:
539; CHECK:       // %bb.0:
540; CHECK-NEXT:    ptrue p0.s
541; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
542; CHECK-NEXT:    ret
543  %res = call <vscale x 4 x half> @llvm.sqrt.nxv4f16(<vscale x 4 x half> %a)
544  ret <vscale x 4 x half> %res
545}
546
547define <vscale x 2 x half> @fsqrt_nxv2f16(<vscale x 2 x half> %a) {
548; CHECK-LABEL: fsqrt_nxv2f16:
549; CHECK:       // %bb.0:
550; CHECK-NEXT:    ptrue p0.d
551; CHECK-NEXT:    fsqrt z0.h, p0/m, z0.h
552; CHECK-NEXT:    ret
553  %res = call <vscale x 2 x half> @llvm.sqrt.nxv2f16(<vscale x 2 x half> %a)
554  ret <vscale x 2 x half> %res
555}
556
557define <vscale x 4 x float> @fsqrt_nxv4f32(<vscale x 4 x float> %a) {
558; CHECK-LABEL: fsqrt_nxv4f32:
559; CHECK:       // %bb.0:
560; CHECK-NEXT:    ptrue p0.s
561; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
562; CHECK-NEXT:    ret
563  %res = call <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float> %a)
564  ret <vscale x 4 x float> %res
565}
566
567define <vscale x 2 x float> @fsqrt_nxv2f32(<vscale x 2 x float> %a) {
568; CHECK-LABEL: fsqrt_nxv2f32:
569; CHECK:       // %bb.0:
570; CHECK-NEXT:    ptrue p0.d
571; CHECK-NEXT:    fsqrt z0.s, p0/m, z0.s
572; CHECK-NEXT:    ret
573  %res = call <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float> %a)
574  ret <vscale x 2 x float> %res
575}
576
577define <vscale x 2 x double> @fsqrt_nxv2f64(<vscale x 2 x double> %a) {
578; CHECK-LABEL: fsqrt_nxv2f64:
579; CHECK:       // %bb.0:
580; CHECK-NEXT:    ptrue p0.d
581; CHECK-NEXT:    fsqrt z0.d, p0/m, z0.d
582; CHECK-NEXT:    ret
583  %res = call <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double> %a)
584  ret <vscale x 2 x double> %res
585}
586
587; FABS
588
589define <vscale x 8 x half> @fabs_nxv8f16(<vscale x 8 x half> %a) {
590; CHECK-LABEL: fabs_nxv8f16:
591; CHECK:       // %bb.0:
592; CHECK-NEXT:    ptrue p0.h
593; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
594; CHECK-NEXT:    ret
595  %res = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %a)
596  ret <vscale x 8 x half> %res
597}
598
599define <vscale x 4 x half> @fabs_nxv4f16(<vscale x 4 x half> %a) {
600; CHECK-LABEL: fabs_nxv4f16:
601; CHECK:       // %bb.0:
602; CHECK-NEXT:    ptrue p0.s
603; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
604; CHECK-NEXT:    ret
605  %res = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %a)
606  ret <vscale x 4 x half> %res
607}
608
609define <vscale x 2 x half> @fabs_nxv2f16(<vscale x 2 x half> %a) {
610; CHECK-LABEL: fabs_nxv2f16:
611; CHECK:       // %bb.0:
612; CHECK-NEXT:    ptrue p0.d
613; CHECK-NEXT:    fabs z0.h, p0/m, z0.h
614; CHECK-NEXT:    ret
615  %res = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %a)
616  ret <vscale x 2 x half> %res
617}
618
619define <vscale x 4 x float> @fabs_nxv4f32(<vscale x 4 x float> %a) {
620; CHECK-LABEL: fabs_nxv4f32:
621; CHECK:       // %bb.0:
622; CHECK-NEXT:    ptrue p0.s
623; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
624; CHECK-NEXT:    ret
625  %res = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %a)
626  ret <vscale x 4 x float> %res
627}
628
629define <vscale x 2 x float> @fabs_nxv2f32(<vscale x 2 x float> %a) {
630; CHECK-LABEL: fabs_nxv2f32:
631; CHECK:       // %bb.0:
632; CHECK-NEXT:    ptrue p0.d
633; CHECK-NEXT:    fabs z0.s, p0/m, z0.s
634; CHECK-NEXT:    ret
635  %res = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %a)
636  ret <vscale x 2 x float> %res
637}
638
639define <vscale x 2 x double> @fabs_nxv2f64(<vscale x 2 x double> %a) {
640; CHECK-LABEL: fabs_nxv2f64:
641; CHECK:       // %bb.0:
642; CHECK-NEXT:    ptrue p0.d
643; CHECK-NEXT:    fabs z0.d, p0/m, z0.d
644; CHECK-NEXT:    ret
645  %res = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %a)
646  ret <vscale x 2 x double> %res
647}
648
649; FABD
650
651define <vscale x 8 x half> @fabd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
652; CHECK-LABEL: fabd_nxv8f16:
653; CHECK:       // %bb.0:
654; CHECK-NEXT:    ptrue p0.h
655; CHECK-NEXT:    fabd z0.h, p0/m, z0.h, z1.h
656; CHECK-NEXT:    ret
657  %sub = fsub <vscale x 8 x half> %a, %b
658  %res = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %sub)
659  ret <vscale x 8 x half> %res
660}
661
662define <vscale x 4 x half> @fabd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
663; CHECK-LABEL: fabd_nxv4f16:
664; CHECK:       // %bb.0:
665; CHECK-NEXT:    ptrue p0.s
666; CHECK-NEXT:    fabd z0.h, p0/m, z0.h, z1.h
667; CHECK-NEXT:    ret
668  %sub = fsub <vscale x 4 x half> %a, %b
669  %res = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %sub)
670  ret <vscale x 4 x half> %res
671}
672
673define <vscale x 2 x half> @fabd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
674; CHECK-LABEL: fabd_nxv2f16:
675; CHECK:       // %bb.0:
676; CHECK-NEXT:    ptrue p0.d
677; CHECK-NEXT:    fabd z0.h, p0/m, z0.h, z1.h
678; CHECK-NEXT:    ret
679  %sub = fsub <vscale x 2 x half> %a, %b
680  %res = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %sub)
681  ret <vscale x 2 x half> %res
682}
683
684define <vscale x 4 x float> @fabd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
685; CHECK-LABEL: fabd_nxv4f32:
686; CHECK:       // %bb.0:
687; CHECK-NEXT:    ptrue p0.s
688; CHECK-NEXT:    fabd z0.s, p0/m, z0.s, z1.s
689; CHECK-NEXT:    ret
690  %sub = fsub <vscale x 4 x float> %a, %b
691  %res = call <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float> %sub)
692  ret <vscale x 4 x float> %res
693}
694
695define <vscale x 2 x float> @fabd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
696; CHECK-LABEL: fabd_nxv2f32:
697; CHECK:       // %bb.0:
698; CHECK-NEXT:    ptrue p0.d
699; CHECK-NEXT:    fabd z0.s, p0/m, z0.s, z1.s
700; CHECK-NEXT:    ret
701  %sub = fsub <vscale x 2 x float> %a, %b
702  %res = call <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float> %sub)
703  ret <vscale x 2 x float> %res
704}
705
706define <vscale x 2 x double> @fabd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
707; CHECK-LABEL: fabd_nxv2f64:
708; CHECK:       // %bb.0:
709; CHECK-NEXT:    ptrue p0.d
710; CHECK-NEXT:    fabd z0.d, p0/m, z0.d, z1.d
711; CHECK-NEXT:    ret
712  %sub = fsub <vscale x 2 x double> %a, %b
713  %res = call <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> %sub)
714  ret <vscale x 2 x double> %res
715}
716
717; maxnum minnum
718
719define <vscale x 16 x half> @maxnum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
720; CHECK-LABEL: maxnum_nxv16f16:
721; CHECK:       // %bb.0:
722; CHECK-NEXT:    ptrue p0.h
723; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z2.h
724; CHECK-NEXT:    fmaxnm z1.h, p0/m, z1.h, z3.h
725; CHECK-NEXT:    ret
726  %res = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
727  ret <vscale x 16 x half> %res
728}
729
730define <vscale x 8 x half> @maxnum_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
731; CHECK-LABEL: maxnum_nxv8f16:
732; CHECK:       // %bb.0:
733; CHECK-NEXT:    ptrue p0.h
734; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
735; CHECK-NEXT:    ret
736  %res = call <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
737  ret <vscale x 8 x half> %res
738}
739
740define <vscale x 4 x half> @maxnum_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
741; CHECK-LABEL: maxnum_nxv4f16:
742; CHECK:       // %bb.0:
743; CHECK-NEXT:    ptrue p0.s
744; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
745; CHECK-NEXT:    ret
746  %res = call <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
747  ret <vscale x 4 x half> %res
748}
749
750define <vscale x 2 x half> @maxnum_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
751; CHECK-LABEL: maxnum_nxv2f16:
752; CHECK:       // %bb.0:
753; CHECK-NEXT:    ptrue p0.d
754; CHECK-NEXT:    fmaxnm z0.h, p0/m, z0.h, z1.h
755; CHECK-NEXT:    ret
756  %res = call <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
757  ret <vscale x 2 x half> %res
758}
759
760define <vscale x 8 x float> @maxnum_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
761; CHECK-LABEL: maxnum_nxv8f32:
762; CHECK:       // %bb.0:
763; CHECK-NEXT:    ptrue p0.s
764; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z2.s
765; CHECK-NEXT:    fmaxnm z1.s, p0/m, z1.s, z3.s
766; CHECK-NEXT:    ret
767  %res = call <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
768  ret <vscale x 8 x float> %res
769}
770
771define <vscale x 4 x float> @maxnum_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
772; CHECK-LABEL: maxnum_nxv4f32:
773; CHECK:       // %bb.0:
774; CHECK-NEXT:    ptrue p0.s
775; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
776; CHECK-NEXT:    ret
777  %res = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
778  ret <vscale x 4 x float> %res
779}
780
781define <vscale x 2 x float> @maxnum_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
782; CHECK-LABEL: maxnum_nxv2f32:
783; CHECK:       // %bb.0:
784; CHECK-NEXT:    ptrue p0.d
785; CHECK-NEXT:    fmaxnm z0.s, p0/m, z0.s, z1.s
786; CHECK-NEXT:    ret
787  %res = call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
788  ret <vscale x 2 x float> %res
789}
790
791define <vscale x 4 x double> @maxnum_nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
792; CHECK-LABEL: maxnum_nxv4f64:
793; CHECK:       // %bb.0:
794; CHECK-NEXT:    ptrue p0.d
795; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z2.d
796; CHECK-NEXT:    fmaxnm z1.d, p0/m, z1.d, z3.d
797; CHECK-NEXT:    ret
798  %res = call <vscale x 4 x double> @llvm.maxnum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
799  ret <vscale x 4 x double> %res
800}
801
802define <vscale x 2 x double> @maxnum_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
803; CHECK-LABEL: maxnum_nxv2f64:
804; CHECK:       // %bb.0:
805; CHECK-NEXT:    ptrue p0.d
806; CHECK-NEXT:    fmaxnm z0.d, p0/m, z0.d, z1.d
807; CHECK-NEXT:    ret
808  %res = call <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
809  ret <vscale x 2 x double> %res
810}
811
812define <vscale x 16 x half> @minnum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
813; CHECK-LABEL: minnum_nxv16f16:
814; CHECK:       // %bb.0:
815; CHECK-NEXT:    ptrue p0.h
816; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z2.h
817; CHECK-NEXT:    fminnm z1.h, p0/m, z1.h, z3.h
818; CHECK-NEXT:    ret
819  %res = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
820  ret <vscale x 16 x half> %res
821}
822
823define <vscale x 8 x half> @minnum_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
824; CHECK-LABEL: minnum_nxv8f16:
825; CHECK:       // %bb.0:
826; CHECK-NEXT:    ptrue p0.h
827; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
828; CHECK-NEXT:    ret
829  %res = call <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
830  ret <vscale x 8 x half> %res
831}
832
833define <vscale x 4 x half> @minnum_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
834; CHECK-LABEL: minnum_nxv4f16:
835; CHECK:       // %bb.0:
836; CHECK-NEXT:    ptrue p0.s
837; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
838; CHECK-NEXT:    ret
839  %res = call <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
840  ret <vscale x 4 x half> %res
841}
842
843define <vscale x 2 x half> @minnum_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
844; CHECK-LABEL: minnum_nxv2f16:
845; CHECK:       // %bb.0:
846; CHECK-NEXT:    ptrue p0.d
847; CHECK-NEXT:    fminnm z0.h, p0/m, z0.h, z1.h
848; CHECK-NEXT:    ret
849  %res = call <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
850  ret <vscale x 2 x half> %res
851}
852
853define <vscale x 8 x float> @minnum_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
854; CHECK-LABEL: minnum_nxv8f32:
855; CHECK:       // %bb.0:
856; CHECK-NEXT:    ptrue p0.s
857; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z2.s
858; CHECK-NEXT:    fminnm z1.s, p0/m, z1.s, z3.s
859; CHECK-NEXT:    ret
860  %res = call <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
861  ret <vscale x 8 x float> %res
862}
863
864define <vscale x 4 x float> @minnum_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
865; CHECK-LABEL: minnum_nxv4f32:
866; CHECK:       // %bb.0:
867; CHECK-NEXT:    ptrue p0.s
868; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
869; CHECK-NEXT:    ret
870  %res = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
871  ret <vscale x 4 x float> %res
872}
873
874define <vscale x 2 x float> @minnum_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
875; CHECK-LABEL: minnum_nxv2f32:
876; CHECK:       // %bb.0:
877; CHECK-NEXT:    ptrue p0.d
878; CHECK-NEXT:    fminnm z0.s, p0/m, z0.s, z1.s
879; CHECK-NEXT:    ret
880  %res = call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
881  ret <vscale x 2 x float> %res
882}
883
884define <vscale x 4 x double> @minnum_nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
885; CHECK-LABEL: minnum_nxv4f64:
886; CHECK:       // %bb.0:
887; CHECK-NEXT:    ptrue p0.d
888; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z2.d
889; CHECK-NEXT:    fminnm z1.d, p0/m, z1.d, z3.d
890; CHECK-NEXT:    ret
891  %res = call <vscale x 4 x double> @llvm.minnum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
892  ret <vscale x 4 x double> %res
893}
894
895define <vscale x 2 x double> @minnum_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
896; CHECK-LABEL: minnum_nxv2f64:
897; CHECK:       // %bb.0:
898; CHECK-NEXT:    ptrue p0.d
899; CHECK-NEXT:    fminnm z0.d, p0/m, z0.d, z1.d
900; CHECK-NEXT:    ret
901  %res = call <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
902  ret <vscale x 2 x double> %res
903}
904
905; maximum minimum
906
907define <vscale x 16 x half> @maximum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
908; CHECK-LABEL: maximum_nxv16f16:
909; CHECK:       // %bb.0:
910; CHECK-NEXT:    ptrue p0.h
911; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z2.h
912; CHECK-NEXT:    fmax z1.h, p0/m, z1.h, z3.h
913; CHECK-NEXT:    ret
914  %res = call <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
915  ret <vscale x 16 x half> %res
916}
917
918define <vscale x 8 x half> @maximum_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
919; CHECK-LABEL: maximum_nxv8f16:
920; CHECK:       // %bb.0:
921; CHECK-NEXT:    ptrue p0.h
922; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
923; CHECK-NEXT:    ret
924  %res = call <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
925  ret <vscale x 8 x half> %res
926}
927
928define <vscale x 4 x half> @maximum_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
929; CHECK-LABEL: maximum_nxv4f16:
930; CHECK:       // %bb.0:
931; CHECK-NEXT:    ptrue p0.s
932; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
933; CHECK-NEXT:    ret
934  %res = call <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
935  ret <vscale x 4 x half> %res
936}
937
938define <vscale x 2 x half> @maximum_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
939; CHECK-LABEL: maximum_nxv2f16:
940; CHECK:       // %bb.0:
941; CHECK-NEXT:    ptrue p0.d
942; CHECK-NEXT:    fmax z0.h, p0/m, z0.h, z1.h
943; CHECK-NEXT:    ret
944  %res = call <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
945  ret <vscale x 2 x half> %res
946}
947
948define <vscale x 8 x float> @maximum_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
949; CHECK-LABEL: maximum_nxv8f32:
950; CHECK:       // %bb.0:
951; CHECK-NEXT:    ptrue p0.s
952; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z2.s
953; CHECK-NEXT:    fmax z1.s, p0/m, z1.s, z3.s
954; CHECK-NEXT:    ret
955  %res = call <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
956  ret <vscale x 8 x float> %res
957}
958
959define <vscale x 4 x float> @maximum_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
960; CHECK-LABEL: maximum_nxv4f32:
961; CHECK:       // %bb.0:
962; CHECK-NEXT:    ptrue p0.s
963; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
964; CHECK-NEXT:    ret
965  %res = call <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
966  ret <vscale x 4 x float> %res
967}
968
969define <vscale x 2 x float> @maximum_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
970; CHECK-LABEL: maximum_nxv2f32:
971; CHECK:       // %bb.0:
972; CHECK-NEXT:    ptrue p0.d
973; CHECK-NEXT:    fmax z0.s, p0/m, z0.s, z1.s
974; CHECK-NEXT:    ret
975  %res = call <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
976  ret <vscale x 2 x float> %res
977}
978
979define <vscale x 4 x double> @maximum_nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
980; CHECK-LABEL: maximum_nxv4f64:
981; CHECK:       // %bb.0:
982; CHECK-NEXT:    ptrue p0.d
983; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z2.d
984; CHECK-NEXT:    fmax z1.d, p0/m, z1.d, z3.d
985; CHECK-NEXT:    ret
986  %res = call <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
987  ret <vscale x 4 x double> %res
988}
989
990define <vscale x 2 x double> @maximum_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
991; CHECK-LABEL: maximum_nxv2f64:
992; CHECK:       // %bb.0:
993; CHECK-NEXT:    ptrue p0.d
994; CHECK-NEXT:    fmax z0.d, p0/m, z0.d, z1.d
995; CHECK-NEXT:    ret
996  %res = call <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
997  ret <vscale x 2 x double> %res
998}
999
1000define <vscale x 16 x half> @minimum_nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b) {
1001; CHECK-LABEL: minimum_nxv16f16:
1002; CHECK:       // %bb.0:
1003; CHECK-NEXT:    ptrue p0.h
1004; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z2.h
1005; CHECK-NEXT:    fmin z1.h, p0/m, z1.h, z3.h
1006; CHECK-NEXT:    ret
1007  %res = call <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half> %a, <vscale x 16 x half> %b)
1008  ret <vscale x 16 x half> %res
1009}
1010
1011define <vscale x 8 x half> @minimum_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1012; CHECK-LABEL: minimum_nxv8f16:
1013; CHECK:       // %bb.0:
1014; CHECK-NEXT:    ptrue p0.h
1015; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
1016; CHECK-NEXT:    ret
1017  %res = call <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b)
1018  ret <vscale x 8 x half> %res
1019}
1020
1021define <vscale x 4 x half> @minimum_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
1022; CHECK-LABEL: minimum_nxv4f16:
1023; CHECK:       // %bb.0:
1024; CHECK-NEXT:    ptrue p0.s
1025; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
1026; CHECK-NEXT:    ret
1027  %res = call <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b)
1028  ret <vscale x 4 x half> %res
1029}
1030
1031define <vscale x 2 x half> @minimum_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
1032; CHECK-LABEL: minimum_nxv2f16:
1033; CHECK:       // %bb.0:
1034; CHECK-NEXT:    ptrue p0.d
1035; CHECK-NEXT:    fmin z0.h, p0/m, z0.h, z1.h
1036; CHECK-NEXT:    ret
1037  %res = call <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b)
1038  ret <vscale x 2 x half> %res
1039}
1040
1041define <vscale x 8 x float> @minimum_nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b) {
1042; CHECK-LABEL: minimum_nxv8f32:
1043; CHECK:       // %bb.0:
1044; CHECK-NEXT:    ptrue p0.s
1045; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z2.s
1046; CHECK-NEXT:    fmin z1.s, p0/m, z1.s, z3.s
1047; CHECK-NEXT:    ret
1048  %res = call <vscale x 8 x float> @llvm.minimum.nxv8f32(<vscale x 8 x float> %a, <vscale x 8 x float> %b)
1049  ret <vscale x 8 x float> %res
1050}
1051
1052define <vscale x 4 x float> @minimum_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1053; CHECK-LABEL: minimum_nxv4f32:
1054; CHECK:       // %bb.0:
1055; CHECK-NEXT:    ptrue p0.s
1056; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
1057; CHECK-NEXT:    ret
1058  %res = call <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b)
1059  ret <vscale x 4 x float> %res
1060}
1061
1062define <vscale x 2 x float> @minimum_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
1063; CHECK-LABEL: minimum_nxv2f32:
1064; CHECK:       // %bb.0:
1065; CHECK-NEXT:    ptrue p0.d
1066; CHECK-NEXT:    fmin z0.s, p0/m, z0.s, z1.s
1067; CHECK-NEXT:    ret
1068  %res = call <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b)
1069  ret <vscale x 2 x float> %res
1070}
1071
1072define <vscale x 4 x double> @minimum_nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b) {
1073; CHECK-LABEL: minimum_nxv4f64:
1074; CHECK:       // %bb.0:
1075; CHECK-NEXT:    ptrue p0.d
1076; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z2.d
1077; CHECK-NEXT:    fmin z1.d, p0/m, z1.d, z3.d
1078; CHECK-NEXT:    ret
1079  %res = call <vscale x 4 x double> @llvm.minimum.nxv4f64(<vscale x 4 x double> %a, <vscale x 4 x double> %b)
1080  ret <vscale x 4 x double> %res
1081}
1082
1083define <vscale x 2 x double> @minimum_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1084; CHECK-LABEL: minimum_nxv2f64:
1085; CHECK:       // %bb.0:
1086; CHECK-NEXT:    ptrue p0.d
1087; CHECK-NEXT:    fmin z0.d, p0/m, z0.d, z1.d
1088; CHECK-NEXT:    ret
1089  %res = call <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b)
1090  ret <vscale x 2 x double> %res
1091}
1092
1093declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
1094declare <vscale x 4 x float>  @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
1095declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
1096
1097declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
1098declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
1099declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
1100
1101declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
1102declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
1103declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
1104declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
1105declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
1106declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
1107
1108declare <vscale x 8 x half> @llvm.sqrt.nxv8f16( <vscale x 8 x half>)
1109declare <vscale x 4 x half> @llvm.sqrt.nxv4f16( <vscale x 4 x half>)
1110declare <vscale x 2 x half> @llvm.sqrt.nxv2f16( <vscale x 2 x half>)
1111declare <vscale x 4 x float> @llvm.sqrt.nxv4f32(<vscale x 4 x float>)
1112declare <vscale x 2 x float> @llvm.sqrt.nxv2f32(<vscale x 2 x float>)
1113declare <vscale x 2 x double> @llvm.sqrt.nxv2f64(<vscale x 2 x double>)
1114
1115declare <vscale x 8 x half> @llvm.fabs.nxv8f16( <vscale x 8 x half>)
1116declare <vscale x 4 x half> @llvm.fabs.nxv4f16( <vscale x 4 x half>)
1117declare <vscale x 2 x half> @llvm.fabs.nxv2f16( <vscale x 2 x half>)
1118declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
1119declare <vscale x 2 x float> @llvm.fabs.nxv2f32(<vscale x 2 x float>)
1120declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
1121
1122declare <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
1123declare <vscale x 8 x half> @llvm.maxnum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
1124declare <vscale x 4 x half> @llvm.maxnum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
1125declare <vscale x 2 x half> @llvm.maxnum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
1126declare <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
1127declare <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
1128declare <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
1129declare <vscale x 4 x double> @llvm.maxnum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
1130declare <vscale x 2 x double> @llvm.maxnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
1131declare <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
1132declare <vscale x 8 x half> @llvm.minnum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
1133declare <vscale x 4 x half> @llvm.minnum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
1134declare <vscale x 2 x half> @llvm.minnum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
1135declare <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
1136declare <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
1137declare <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
1138declare <vscale x 4 x double> @llvm.minnum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
1139declare <vscale x 2 x double> @llvm.minnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
1140
1141declare <vscale x 16 x half> @llvm.maximum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
1142declare <vscale x 8 x half> @llvm.maximum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
1143declare <vscale x 4 x half> @llvm.maximum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
1144declare <vscale x 2 x half> @llvm.maximum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
1145declare <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
1146declare <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
1147declare <vscale x 2 x float> @llvm.maximum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
1148declare <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
1149declare <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
1150declare <vscale x 16 x half> @llvm.minimum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
1151declare <vscale x 8 x half> @llvm.minimum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
1152declare <vscale x 4 x half> @llvm.minimum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
1153declare <vscale x 2 x half> @llvm.minimum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
1154declare <vscale x 8 x float> @llvm.minimum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
1155declare <vscale x 4 x float> @llvm.minimum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
1156declare <vscale x 2 x float> @llvm.minimum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
1157declare <vscale x 4 x double> @llvm.minimum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
1158declare <vscale x 2 x double> @llvm.minimum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
1159
1160; Function Attrs: nounwind readnone
1161declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2
1162