xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-unpred-form.ll (revision c5ed93f975830b4ed52f1899bfc9d8c89bf81c38)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
3
4;
5; ADD
6;
7
8define <vscale x 16 x i8> @add_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
9; CHECK-LABEL: add_i8:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    add z0.b, z0.b, z1.b
12; CHECK-NEXT:    ret
13  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
14  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> %pg,
15                                                                 <vscale x 16 x i8> %a,
16                                                                 <vscale x 16 x i8> %b)
17  ret <vscale x 16 x i8> %out
18}
19
20define <vscale x 8 x i16> @add_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
21; CHECK-LABEL: add_i16:
22; CHECK:       // %bb.0:
23; CHECK-NEXT:    add z0.h, z0.h, z1.h
24; CHECK-NEXT:    ret
25  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
26  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> %pg,
27                                                                 <vscale x 8 x i16> %a,
28                                                                 <vscale x 8 x i16> %b)
29  ret <vscale x 8 x i16> %out
30}
31
32define <vscale x 4 x i32> @add_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
33; CHECK-LABEL: add_i32:
34; CHECK:       // %bb.0:
35; CHECK-NEXT:    add z0.s, z0.s, z1.s
36; CHECK-NEXT:    ret
37  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
38  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> %pg,
39                                                                 <vscale x 4 x i32> %a,
40                                                                 <vscale x 4 x i32> %b)
41  ret <vscale x 4 x i32> %out
42}
43
44define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
45; CHECK-LABEL: add_i64:
46; CHECK:       // %bb.0:
47; CHECK-NEXT:    add z0.d, z0.d, z1.d
48; CHECK-NEXT:    ret
49  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
50  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> %pg,
51                                                                 <vscale x 2 x i64> %a,
52                                                                 <vscale x 2 x i64> %b)
53  ret <vscale x 2 x i64> %out
54}
55
56;
57; SUB
58;
59
60define <vscale x 16 x i8> @sub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
61; CHECK-LABEL: sub_i8:
62; CHECK:       // %bb.0:
63; CHECK-NEXT:    sub z0.b, z0.b, z1.b
64; CHECK-NEXT:    ret
65  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
66  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> %pg,
67                                                                 <vscale x 16 x i8> %a,
68                                                                 <vscale x 16 x i8> %b)
69  ret <vscale x 16 x i8> %out
70}
71
72define <vscale x 8 x i16> @sub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
73; CHECK-LABEL: sub_i16:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    sub z0.h, z0.h, z1.h
76; CHECK-NEXT:    ret
77  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
78  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> %pg,
79                                                                 <vscale x 8 x i16> %a,
80                                                                 <vscale x 8 x i16> %b)
81  ret <vscale x 8 x i16> %out
82}
83
84define <vscale x 4 x i32> @sub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
85; CHECK-LABEL: sub_i32:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    sub z0.s, z0.s, z1.s
88; CHECK-NEXT:    ret
89  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
90  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg,
91                                                                 <vscale x 4 x i32> %a,
92                                                                 <vscale x 4 x i32> %b)
93  ret <vscale x 4 x i32> %out
94}
95
96define <vscale x 2 x i64> @sub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
97; CHECK-LABEL: sub_i64:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    sub z0.d, z0.d, z1.d
100; CHECK-NEXT:    ret
101  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
102  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> %pg,
103                                                                 <vscale x 2 x i64> %a,
104                                                                 <vscale x 2 x i64> %b)
105  ret <vscale x 2 x i64> %out
106}
107
108; As sub_i32 but where pg is i8 based and thus compatible for i32.
109define <vscale x 4 x i32> @sub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
110; CHECK-LABEL: sub_i32_ptrue_all_b:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    sub z0.s, z0.s, z1.s
113; CHECK-NEXT:    ret
114  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
115  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
116  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
117                                                                      <vscale x 4 x i32> %a,
118                                                                      <vscale x 4 x i32> %b)
119  ret <vscale x 4 x i32> %out
120}
121
122; As sub_i32 but where pg is i16 based and thus compatible for i32.
123define <vscale x 4 x i32> @sub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
124; CHECK-LABEL: sub_i32_ptrue_all_h:
125; CHECK:       // %bb.0:
126; CHECK-NEXT:    sub z0.s, z0.s, z1.s
127; CHECK-NEXT:    ret
128  %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
129  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
130  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
131  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
132                                                                      <vscale x 4 x i32> %a,
133                                                                      <vscale x 4 x i32> %b)
134  ret <vscale x 4 x i32> %out
135}
136
137; As sub_i32 but where pg is i64 based, which is not compatibile for i32 and
138; thus inactive lanes are important and the immediate form cannot be used.
139define <vscale x 4 x i32> @sub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
140; CHECK-LABEL: sub_i32_ptrue_all_d:
141; CHECK:       // %bb.0:
142; CHECK-NEXT:    ptrue p0.d
143; CHECK-NEXT:    sub z0.s, p0/m, z0.s, z1.s
144; CHECK-NEXT:    ret
145  %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
146  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
147  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
148  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %pg.s,
149                                                                    <vscale x 4 x i32> %a,
150                                                                    <vscale x 4 x i32> %b)
151  ret <vscale x 4 x i32> %out
152}
153
154;
155; MUL
156;
157
158define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
159; CHECK-LABEL: mul_i8:
160; CHECK:       // %bb.0:
161; CHECK-NEXT:    mul z0.b, z0.b, z1.b
162; CHECK-NEXT:    ret
163  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
164  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> %pg,
165                                                                 <vscale x 16 x i8> %a,
166                                                                 <vscale x 16 x i8> %b)
167  ret <vscale x 16 x i8> %out
168}
169
170define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
171; CHECK-LABEL: mul_i16:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    mul z0.h, z0.h, z1.h
174; CHECK-NEXT:    ret
175  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
176  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> %pg,
177                                                                 <vscale x 8 x i16> %a,
178                                                                 <vscale x 8 x i16> %b)
179  ret <vscale x 8 x i16> %out
180}
181
182define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
183; CHECK-LABEL: mul_i32:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    mul z0.s, z0.s, z1.s
186; CHECK-NEXT:    ret
187  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
188  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> %pg,
189                                                                 <vscale x 4 x i32> %a,
190                                                                 <vscale x 4 x i32> %b)
191  ret <vscale x 4 x i32> %out
192}
193
194define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
195; CHECK-LABEL: mul_i64:
196; CHECK:       // %bb.0:
197; CHECK-NEXT:    mul z0.d, z0.d, z1.d
198; CHECK-NEXT:    ret
199  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
200  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> %pg,
201                                                                 <vscale x 2 x i64> %a,
202                                                                 <vscale x 2 x i64> %b)
203  ret <vscale x 2 x i64> %out
204}
205
206;
207; SMULH
208;
209
210define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
211; CHECK-LABEL: smulh_i8:
212; CHECK:       // %bb.0:
213; CHECK-NEXT:    smulh z0.b, z0.b, z1.b
214; CHECK-NEXT:    ret
215  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
216  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> %pg,
217                                                                   <vscale x 16 x i8> %a,
218                                                                   <vscale x 16 x i8> %b)
219  ret <vscale x 16 x i8> %out
220}
221
222define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
223; CHECK-LABEL: smulh_i16:
224; CHECK:       // %bb.0:
225; CHECK-NEXT:    smulh z0.h, z0.h, z1.h
226; CHECK-NEXT:    ret
227  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
228  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> %pg,
229                                                                   <vscale x 8 x i16> %a,
230                                                                   <vscale x 8 x i16> %b)
231  ret <vscale x 8 x i16> %out
232}
233
234define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
235; CHECK-LABEL: smulh_i32:
236; CHECK:       // %bb.0:
237; CHECK-NEXT:    smulh z0.s, z0.s, z1.s
238; CHECK-NEXT:    ret
239  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
240  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> %pg,
241                                                                   <vscale x 4 x i32> %a,
242                                                                   <vscale x 4 x i32> %b)
243  ret <vscale x 4 x i32> %out
244}
245
246define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
247; CHECK-LABEL: smulh_i64:
248; CHECK:       // %bb.0:
249; CHECK-NEXT:    smulh z0.d, z0.d, z1.d
250; CHECK-NEXT:    ret
251  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
252  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> %pg,
253                                                                   <vscale x 2 x i64> %a,
254                                                                   <vscale x 2 x i64> %b)
255  ret <vscale x 2 x i64> %out
256}
257
258;
259; UMULH
260;
261
262define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
263; CHECK-LABEL: umulh_i8:
264; CHECK:       // %bb.0:
265; CHECK-NEXT:    umulh z0.b, z0.b, z1.b
266; CHECK-NEXT:    ret
267  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
268  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> %pg,
269                                                                   <vscale x 16 x i8> %a,
270                                                                   <vscale x 16 x i8> %b)
271  ret <vscale x 16 x i8> %out
272}
273
274define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
275; CHECK-LABEL: umulh_i16:
276; CHECK:       // %bb.0:
277; CHECK-NEXT:    umulh z0.h, z0.h, z1.h
278; CHECK-NEXT:    ret
279  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
280  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> %pg,
281                                                                   <vscale x 8 x i16> %a,
282                                                                   <vscale x 8 x i16> %b)
283  ret <vscale x 8 x i16> %out
284}
285
286define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
287; CHECK-LABEL: umulh_i32:
288; CHECK:       // %bb.0:
289; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
290; CHECK-NEXT:    ret
291  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
292  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg,
293                                                                   <vscale x 4 x i32> %a,
294                                                                   <vscale x 4 x i32> %b)
295  ret <vscale x 4 x i32> %out
296}
297
298define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
299; CHECK-LABEL: umulh_i64:
300; CHECK:       // %bb.0:
301; CHECK-NEXT:    umulh z0.d, z0.d, z1.d
302; CHECK-NEXT:    ret
303  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
304  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> %pg,
305                                                                   <vscale x 2 x i64> %a,
306                                                                   <vscale x 2 x i64> %b)
307  ret <vscale x 2 x i64> %out
308}
309
310; As umulh_i32 but where pg is i8 based and thus compatible for i32.
311define <vscale x 4 x i32> @umulh_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
312; CHECK-LABEL: umulh_i32_ptrue_all_b:
313; CHECK:       // %bb.0:
314; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
315; CHECK-NEXT:    ret
316  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
317  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
318  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg.s,
319                                                                        <vscale x 4 x i32> %a,
320                                                                        <vscale x 4 x i32> %b)
321  ret <vscale x 4 x i32> %out
322}
323
324; As umulh_i32 but where pg is i16 based and thus compatible for i32.
325define <vscale x 4 x i32> @umulh_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
326; CHECK-LABEL: umulh_i32_ptrue_all_h:
327; CHECK:       // %bb.0:
328; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
329; CHECK-NEXT:    ret
330  %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
331  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
332  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
333  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> %pg.s,
334                                                                        <vscale x 4 x i32> %a,
335                                                                        <vscale x 4 x i32> %b)
336  ret <vscale x 4 x i32> %out
337}
338
339; As umulh_i32 but where pg is i64 based, which is not compatibile for i32 and
340; thus inactive lanes are important and the immediate form cannot be used.
341define <vscale x 4 x i32> @umulh_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
342; CHECK-LABEL: umulh_i32_ptrue_all_d:
343; CHECK:       // %bb.0:
344; CHECK-NEXT:    ptrue p0.d
345; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
346; CHECK-NEXT:    ret
347  %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
348  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
349  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
350  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %pg.s,
351                                                                      <vscale x 4 x i32> %a,
352                                                                      <vscale x 4 x i32> %b)
353  ret <vscale x 4 x i32> %out
354}
355
356;
357; AND
358;
359
360define <vscale x 16 x i8> @and_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
361; CHECK-LABEL: and_i8:
362; CHECK:       // %bb.0:
363; CHECK-NEXT:    and z0.d, z0.d, z1.d
364; CHECK-NEXT:    ret
365  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
366  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> %pg,
367                                                                 <vscale x 16 x i8> %a,
368                                                                 <vscale x 16 x i8> %b)
369  ret <vscale x 16 x i8> %out
370}
371
372define <vscale x 8 x i16> @and_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
373; CHECK-LABEL: and_i16:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    and z0.d, z0.d, z1.d
376; CHECK-NEXT:    ret
377  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
378  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> %pg,
379                                                                 <vscale x 8 x i16> %a,
380                                                                 <vscale x 8 x i16> %b)
381  ret <vscale x 8 x i16> %out
382}
383
384define <vscale x 4 x i32> @and_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
385; CHECK-LABEL: and_i32:
386; CHECK:       // %bb.0:
387; CHECK-NEXT:    and z0.d, z0.d, z1.d
388; CHECK-NEXT:    ret
389  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
390  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> %pg,
391                                                                 <vscale x 4 x i32> %a,
392                                                                 <vscale x 4 x i32> %b)
393  ret <vscale x 4 x i32> %out
394}
395
396define <vscale x 2 x i64> @and_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
397; CHECK-LABEL: and_i64:
398; CHECK:       // %bb.0:
399; CHECK-NEXT:    and z0.d, z0.d, z1.d
400; CHECK-NEXT:    ret
401  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
402  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> %pg,
403                                                                 <vscale x 2 x i64> %a,
404                                                                 <vscale x 2 x i64> %b)
405  ret <vscale x 2 x i64> %out
406}
407
408;
409; BIC
410;
411
412define <vscale x 16 x i8> @bic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
413; CHECK-LABEL: bic_i8:
414; CHECK:       // %bb.0:
415; CHECK-NEXT:    bic z0.d, z0.d, z1.d
416; CHECK-NEXT:    ret
417  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
418  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> %pg,
419                                                                 <vscale x 16 x i8> %a,
420                                                                 <vscale x 16 x i8> %b)
421  ret <vscale x 16 x i8> %out
422}
423
424define <vscale x 8 x i16> @bic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
425; CHECK-LABEL: bic_i16:
426; CHECK:       // %bb.0:
427; CHECK-NEXT:    bic z0.d, z0.d, z1.d
428; CHECK-NEXT:    ret
429  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
430  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> %pg,
431                                                                 <vscale x 8 x i16> %a,
432                                                                 <vscale x 8 x i16> %b)
433  ret <vscale x 8 x i16> %out
434}
435
436define <vscale x 4 x i32> @bic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
437; CHECK-LABEL: bic_i32:
438; CHECK:       // %bb.0:
439; CHECK-NEXT:    bic z0.d, z0.d, z1.d
440; CHECK-NEXT:    ret
441  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
442  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> %pg,
443                                                                 <vscale x 4 x i32> %a,
444                                                                 <vscale x 4 x i32> %b)
445  ret <vscale x 4 x i32> %out
446}
447
448define <vscale x 2 x i64> @bic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
449; CHECK-LABEL: bic_i64:
450; CHECK:       // %bb.0:
451; CHECK-NEXT:    bic z0.d, z0.d, z1.d
452; CHECK-NEXT:    ret
453  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
454  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> %pg,
455                                                                 <vscale x 2 x i64> %a,
456                                                                 <vscale x 2 x i64> %b)
457  ret <vscale x 2 x i64> %out
458}
459
460;
461; EOR
462;
463
464define <vscale x 16 x i8> @eor_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
465; CHECK-LABEL: eor_i8:
466; CHECK:       // %bb.0:
467; CHECK-NEXT:    eor z0.d, z0.d, z1.d
468; CHECK-NEXT:    ret
469  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
470  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> %pg,
471                                                                 <vscale x 16 x i8> %a,
472                                                                 <vscale x 16 x i8> %b)
473  ret <vscale x 16 x i8> %out
474}
475
476define <vscale x 8 x i16> @eor_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
477; CHECK-LABEL: eor_i16:
478; CHECK:       // %bb.0:
479; CHECK-NEXT:    eor z0.d, z0.d, z1.d
480; CHECK-NEXT:    ret
481  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
482  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> %pg,
483                                                                 <vscale x 8 x i16> %a,
484                                                                 <vscale x 8 x i16> %b)
485  ret <vscale x 8 x i16> %out
486}
487
488define <vscale x 4 x i32> @eor_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
489; CHECK-LABEL: eor_i32:
490; CHECK:       // %bb.0:
491; CHECK-NEXT:    eor z0.d, z0.d, z1.d
492; CHECK-NEXT:    ret
493  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
494  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> %pg,
495                                                                 <vscale x 4 x i32> %a,
496                                                                 <vscale x 4 x i32> %b)
497  ret <vscale x 4 x i32> %out
498}
499
500define <vscale x 2 x i64> @eor_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
501; CHECK-LABEL: eor_i64:
502; CHECK:       // %bb.0:
503; CHECK-NEXT:    eor z0.d, z0.d, z1.d
504; CHECK-NEXT:    ret
505  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
506  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> %pg,
507                                                                 <vscale x 2 x i64> %a,
508                                                                 <vscale x 2 x i64> %b)
509  ret <vscale x 2 x i64> %out
510}
511
512;
513; ORR
514;
515
516define <vscale x 16 x i8> @orr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
517; CHECK-LABEL: orr_i8:
518; CHECK:       // %bb.0:
519; CHECK-NEXT:    orr z0.d, z0.d, z1.d
520; CHECK-NEXT:    ret
521  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
522  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> %pg,
523                                                                 <vscale x 16 x i8> %a,
524                                                                 <vscale x 16 x i8> %b)
525  ret <vscale x 16 x i8> %out
526}
527
528define <vscale x 8 x i16> @orr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
529; CHECK-LABEL: orr_i16:
530; CHECK:       // %bb.0:
531; CHECK-NEXT:    orr z0.d, z0.d, z1.d
532; CHECK-NEXT:    ret
533  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
534  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> %pg,
535                                                                 <vscale x 8 x i16> %a,
536                                                                 <vscale x 8 x i16> %b)
537  ret <vscale x 8 x i16> %out
538}
539
540define <vscale x 4 x i32> @orr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
541; CHECK-LABEL: orr_i32:
542; CHECK:       // %bb.0:
543; CHECK-NEXT:    orr z0.d, z0.d, z1.d
544; CHECK-NEXT:    ret
545  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
546  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg,
547                                                                 <vscale x 4 x i32> %a,
548                                                                 <vscale x 4 x i32> %b)
549  ret <vscale x 4 x i32> %out
550}
551
552define <vscale x 2 x i64> @orr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
553; CHECK-LABEL: orr_i64:
554; CHECK:       // %bb.0:
555; CHECK-NEXT:    orr z0.d, z0.d, z1.d
556; CHECK-NEXT:    ret
557  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
558  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> %pg,
559                                                                 <vscale x 2 x i64> %a,
560                                                                 <vscale x 2 x i64> %b)
561  ret <vscale x 2 x i64> %out
562}
563
564; As orr_i32 but where pg is i8 based and thus compatible for i32.
565define <vscale x 4 x i32> @orr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
566; CHECK-LABEL: orr_i32_ptrue_all_b:
567; CHECK:       // %bb.0:
568; CHECK-NEXT:    orr z0.d, z0.d, z1.d
569; CHECK-NEXT:    ret
570  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
571  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
572  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
573                                                                      <vscale x 4 x i32> %a,
574                                                                      <vscale x 4 x i32> %b)
575  ret <vscale x 4 x i32> %out
576}
577
578; As orr_i32 but where pg is i16 based and thus compatible for i32.
579define <vscale x 4 x i32> @orr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
580; CHECK-LABEL: orr_i32_ptrue_all_h:
581; CHECK:       // %bb.0:
582; CHECK-NEXT:    orr z0.d, z0.d, z1.d
583; CHECK-NEXT:    ret
584  %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
585  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
586  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
587  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> %pg.s,
588                                                                      <vscale x 4 x i32> %a,
589                                                                      <vscale x 4 x i32> %b)
590  ret <vscale x 4 x i32> %out
591}
592
593; As orr_i32 but where pg is i64 based, which is not compatibile for i32 and
594; thus inactive lanes are important and the immediate form cannot be used.
595define <vscale x 4 x i32> @orr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
596; CHECK-LABEL: orr_i32_ptrue_all_d:
597; CHECK:       // %bb.0:
598; CHECK-NEXT:    ptrue p0.d
599; CHECK-NEXT:    orr z0.s, p0/m, z0.s, z1.s
600; CHECK-NEXT:    ret
601  %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
602  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
603  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
604  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %pg.s,
605                                                                    <vscale x 4 x i32> %a,
606                                                                    <vscale x 4 x i32> %b)
607  ret <vscale x 4 x i32> %out
608}
609
610;
611; SQADD
612;
613
614define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
615; CHECK-LABEL: sqadd_i8:
616; CHECK:       // %bb.0:
617; CHECK-NEXT:    sqadd z0.b, z0.b, z1.b
618; CHECK-NEXT:    ret
619  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
620  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
621                                                                 <vscale x 16 x i8> %a,
622                                                                 <vscale x 16 x i8> %b)
623  ret <vscale x 16 x i8> %out
624}
625
626define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
627; CHECK-LABEL: sqadd_i16:
628; CHECK:       // %bb.0:
629; CHECK-NEXT:    sqadd z0.h, z0.h, z1.h
630; CHECK-NEXT:    ret
631  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
632  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
633                                                                 <vscale x 8 x i16> %a,
634                                                                 <vscale x 8 x i16> %b)
635  ret <vscale x 8 x i16> %out
636}
637
638define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
639; CHECK-LABEL: sqadd_i32:
640; CHECK:       // %bb.0:
641; CHECK-NEXT:    sqadd z0.s, z0.s, z1.s
642; CHECK-NEXT:    ret
643  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
644  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
645                                                                 <vscale x 4 x i32> %a,
646                                                                 <vscale x 4 x i32> %b)
647  ret <vscale x 4 x i32> %out
648}
649
650define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
651; CHECK-LABEL: sqadd_i64:
652; CHECK:       // %bb.0:
653; CHECK-NEXT:    sqadd z0.d, z0.d, z1.d
654; CHECK-NEXT:    ret
655  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
656  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
657                                                                 <vscale x 2 x i64> %a,
658                                                                 <vscale x 2 x i64> %b)
659  ret <vscale x 2 x i64> %out
660}
661
662;
663; SQSUB
664;
665
666define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
667; CHECK-LABEL: sqsub_i8:
668; CHECK:       // %bb.0:
669; CHECK-NEXT:    sqsub z0.b, z0.b, z1.b
670; CHECK-NEXT:    ret
671  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
672  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
673                                                                   <vscale x 16 x i8> %a,
674                                                                   <vscale x 16 x i8> %b)
675  ret <vscale x 16 x i8> %out
676}
677
678define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
679; CHECK-LABEL: sqsub_i16:
680; CHECK:       // %bb.0:
681; CHECK-NEXT:    sqsub z0.h, z0.h, z1.h
682; CHECK-NEXT:    ret
683  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
684  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
685                                                                   <vscale x 8 x i16> %a,
686                                                                   <vscale x 8 x i16> %b)
687  ret <vscale x 8 x i16> %out
688}
689
690define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
691; CHECK-LABEL: sqsub_i32:
692; CHECK:       // %bb.0:
693; CHECK-NEXT:    sqsub z0.s, z0.s, z1.s
694; CHECK-NEXT:    ret
695  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
696  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
697                                                                   <vscale x 4 x i32> %a,
698                                                                   <vscale x 4 x i32> %b)
699  ret <vscale x 4 x i32> %out
700}
701
702define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
703; CHECK-LABEL: sqsub_i64:
704; CHECK:       // %bb.0:
705; CHECK-NEXT:    sqsub z0.d, z0.d, z1.d
706; CHECK-NEXT:    ret
707  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
708  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
709                                                                   <vscale x 2 x i64> %a,
710                                                                   <vscale x 2 x i64> %b)
711  ret <vscale x 2 x i64> %out
712}
713
714;
715; UQADD
716;
717
718define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
719; CHECK-LABEL: uqadd_i8:
720; CHECK:       // %bb.0:
721; CHECK-NEXT:    uqadd z0.b, z0.b, z1.b
722; CHECK-NEXT:    ret
723  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
724  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
725                                                                 <vscale x 16 x i8> %a,
726                                                                 <vscale x 16 x i8> %b)
727  ret <vscale x 16 x i8> %out
728}
729
730define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
731; CHECK-LABEL: uqadd_i16:
732; CHECK:       // %bb.0:
733; CHECK-NEXT:    uqadd z0.h, z0.h, z1.h
734; CHECK-NEXT:    ret
735  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
736  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
737                                                                 <vscale x 8 x i16> %a,
738                                                                 <vscale x 8 x i16> %b)
739  ret <vscale x 8 x i16> %out
740}
741
742define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
743; CHECK-LABEL: uqadd_i32:
744; CHECK:       // %bb.0:
745; CHECK-NEXT:    uqadd z0.s, z0.s, z1.s
746; CHECK-NEXT:    ret
747  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
748  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
749                                                                 <vscale x 4 x i32> %a,
750                                                                 <vscale x 4 x i32> %b)
751  ret <vscale x 4 x i32> %out
752}
753
754define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
755; CHECK-LABEL: uqadd_i64:
756; CHECK:       // %bb.0:
757; CHECK-NEXT:    uqadd z0.d, z0.d, z1.d
758; CHECK-NEXT:    ret
759  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
760  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
761                                                                 <vscale x 2 x i64> %a,
762                                                                 <vscale x 2 x i64> %b)
763  ret <vscale x 2 x i64> %out
764}
765
766;
767; UQSUB
768;
769
770define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
771; CHECK-LABEL: uqsub_i8:
772; CHECK:       // %bb.0:
773; CHECK-NEXT:    uqsub z0.b, z0.b, z1.b
774; CHECK-NEXT:    ret
775  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
776  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> %pg,
777                                                                   <vscale x 16 x i8> %a,
778                                                                   <vscale x 16 x i8> %b)
779  ret <vscale x 16 x i8> %out
780}
781
782define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
783; CHECK-LABEL: uqsub_i16:
784; CHECK:       // %bb.0:
785; CHECK-NEXT:    uqsub z0.h, z0.h, z1.h
786; CHECK-NEXT:    ret
787  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
788  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> %pg,
789                                                                   <vscale x 8 x i16> %a,
790                                                                   <vscale x 8 x i16> %b)
791  ret <vscale x 8 x i16> %out
792}
793
794define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
795; CHECK-LABEL: uqsub_i32:
796; CHECK:       // %bb.0:
797; CHECK-NEXT:    uqsub z0.s, z0.s, z1.s
798; CHECK-NEXT:    ret
799  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
800  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg,
801                                                                   <vscale x 4 x i32> %a,
802                                                                   <vscale x 4 x i32> %b)
803  ret <vscale x 4 x i32> %out
804}
805
806define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
807; CHECK-LABEL: uqsub_i64:
808; CHECK:       // %bb.0:
809; CHECK-NEXT:    uqsub z0.d, z0.d, z1.d
810; CHECK-NEXT:    ret
811  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
812  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> %pg,
813                                                                   <vscale x 2 x i64> %a,
814                                                                   <vscale x 2 x i64> %b)
815  ret <vscale x 2 x i64> %out
816}
817
818; As uqsub_i32 but where pg is i8 based and thus compatible for i32.
819define <vscale x 4 x i32> @uqsub_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
820; CHECK-LABEL: uqsub_i32_ptrue_all_b:
821; CHECK:       // %bb.0:
822; CHECK-NEXT:    uqsub z0.s, z0.s, z1.s
823; CHECK-NEXT:    ret
824  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
825  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
826  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
827                                                                        <vscale x 4 x i32> %a,
828                                                                        <vscale x 4 x i32> %b)
829  ret <vscale x 4 x i32> %out
830}
831
832; As uqsub_i32 but where pg is i16 based and thus compatible for i32.
833define <vscale x 4 x i32> @uqsub_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
834; CHECK-LABEL: uqsub_i32_ptrue_all_h:
835; CHECK:       // %bb.0:
836; CHECK-NEXT:    uqsub z0.s, z0.s, z1.s
837; CHECK-NEXT:    ret
838  %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
839  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
840  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
841  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> %pg.s,
842                                                                        <vscale x 4 x i32> %a,
843                                                                        <vscale x 4 x i32> %b)
844  ret <vscale x 4 x i32> %out
845}
846
847; As uqsub_i32 but where pg is i64 based, which is not compatibile for i32 and
848; thus inactive lanes are important and the immediate form cannot be used.
849define <vscale x 4 x i32> @uqsub_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
850; CHECK-LABEL: uqsub_i32_ptrue_all_d:
851; CHECK:       // %bb.0:
852; CHECK-NEXT:    ptrue p0.d
853; CHECK-NEXT:    uqsub z0.s, p0/m, z0.s, z1.s
854; CHECK-NEXT:    ret
855  %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
856  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
857  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
858  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg.s,
859                                                                      <vscale x 4 x i32> %a,
860                                                                      <vscale x 4 x i32> %b)
861  ret <vscale x 4 x i32> %out
862}
863
864;
865; ASR (wide)
866;
867
868define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
869; CHECK-LABEL: asr_i8:
870; CHECK:       // %bb.0:
871; CHECK-NEXT:    asr z0.b, z0.b, z1.d
872; CHECK-NEXT:    ret
873  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
874  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
875                                                                    <vscale x 16 x i8> %a,
876                                                                    <vscale x 2 x i64> %b)
877  ret <vscale x 16 x i8> %out
878}
879
880define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
881; CHECK-LABEL: asr_i16:
882; CHECK:       // %bb.0:
883; CHECK-NEXT:    asr z0.h, z0.h, z1.d
884; CHECK-NEXT:    ret
885  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
886  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
887                                                                    <vscale x 8 x i16> %a,
888                                                                    <vscale x 2 x i64> %b)
889  ret <vscale x 8 x i16> %out
890}
891
892define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
893; CHECK-LABEL: asr_i32:
894; CHECK:       // %bb.0:
895; CHECK-NEXT:    asr z0.s, z0.s, z1.d
896; CHECK-NEXT:    ret
897  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
898  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
899                                                                    <vscale x 4 x i32> %a,
900                                                                    <vscale x 2 x i64> %b)
901  ret <vscale x 4 x i32> %out
902}
903
904;
905; LSL (wide)
906;
907
908define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
909; CHECK-LABEL: lsl_i8:
910; CHECK:       // %bb.0:
911; CHECK-NEXT:    lsl z0.b, z0.b, z1.d
912; CHECK-NEXT:    ret
913  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
914  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
915                                                                    <vscale x 16 x i8> %a,
916                                                                    <vscale x 2 x i64> %b)
917  ret <vscale x 16 x i8> %out
918}
919
920define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
921; CHECK-LABEL: lsl_i16:
922; CHECK:       // %bb.0:
923; CHECK-NEXT:    lsl z0.h, z0.h, z1.d
924; CHECK-NEXT:    ret
925  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
926  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
927                                                                    <vscale x 8 x i16> %a,
928                                                                    <vscale x 2 x i64> %b)
929  ret <vscale x 8 x i16> %out
930}
931
932define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
933; CHECK-LABEL: lsl_i32:
934; CHECK:       // %bb.0:
935; CHECK-NEXT:    lsl z0.s, z0.s, z1.d
936; CHECK-NEXT:    ret
937  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
938  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
939                                                                    <vscale x 4 x i32> %a,
940                                                                    <vscale x 2 x i64> %b)
941  ret <vscale x 4 x i32> %out
942}
943
944;
945; LSR (wide)
946;
947
948define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
949; CHECK-LABEL: lsr_i8:
950; CHECK:       // %bb.0:
951; CHECK-NEXT:    lsr z0.b, z0.b, z1.d
952; CHECK-NEXT:    ret
953  %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
954  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
955                                                                    <vscale x 16 x i8> %a,
956                                                                    <vscale x 2 x i64> %b)
957  ret <vscale x 16 x i8> %out
958}
959
960define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
961; CHECK-LABEL: lsr_i16:
962; CHECK:       // %bb.0:
963; CHECK-NEXT:    lsr z0.h, z0.h, z1.d
964; CHECK-NEXT:    ret
965  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
966  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
967                                                                    <vscale x 8 x i16> %a,
968                                                                    <vscale x 2 x i64> %b)
969  ret <vscale x 8 x i16> %out
970}
971
972define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
973; CHECK-LABEL: lsr_i32:
974; CHECK:       // %bb.0:
975; CHECK-NEXT:    lsr z0.s, z0.s, z1.d
976; CHECK-NEXT:    ret
977  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
978  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
979                                                                    <vscale x 4 x i32> %a,
980                                                                    <vscale x 2 x i64> %b)
981  ret <vscale x 4 x i32> %out
982}
983
984; As lsr_i32 but where pg is i8 based and thus compatible for i32.
985define <vscale x 4 x i32> @lsr_i32_ptrue_all_b(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
986; CHECK-LABEL: lsr_i32_ptrue_all_b:
987; CHECK:       // %bb.0:
988; CHECK-NEXT:    lsr z0.s, z0.s, z1.d
989; CHECK-NEXT:    ret
990  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
991  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
992  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
993                                                                         <vscale x 4 x i32> %a,
994                                                                         <vscale x 2 x i64> %b)
995  ret <vscale x 4 x i32> %out
996}
997
998; As lsr_i32 but where pg is i16 based and thus compatible for i32.
999define <vscale x 4 x i32> @lsr_i32_ptrue_all_h(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
1000; CHECK-LABEL: lsr_i32_ptrue_all_h:
1001; CHECK:       // %bb.0:
1002; CHECK-NEXT:    lsr z0.s, z0.s, z1.d
1003; CHECK-NEXT:    ret
1004  %pg.h = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1005  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg.h)
1006  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1007  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
1008                                                                         <vscale x 4 x i32> %a,
1009                                                                         <vscale x 2 x i64> %b)
1010  ret <vscale x 4 x i32> %out
1011}
1012
1013; As lsr_i32 but where pg is i64 based, which is not compatibile for i32 and
1014; thus inactive lanes are important and the immediate form cannot be used.
1015define <vscale x 4 x i32> @lsr_i32_ptrue_all_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
1016; CHECK-LABEL: lsr_i32_ptrue_all_d:
1017; CHECK:       // %bb.0:
1018; CHECK-NEXT:    ptrue p0.d
1019; CHECK-NEXT:    lsr z0.s, p0/m, z0.s, z1.d
1020; CHECK-NEXT:    ret
1021  %pg.d = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1022  %pg.b = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg.d)
1023  %pg.s = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %pg.b)
1024  %out = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg.s,
1025                                                                         <vscale x 4 x i32> %a,
1026                                                                         <vscale x 2 x i64> %b)
1027  ret <vscale x 4 x i32> %out
1028}
1029
1030;
1031; FADD
1032;
1033
1034define <vscale x 8 x half> @fadd_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1035; CHECK-LABEL: fadd_half:
1036; CHECK:       // %bb.0:
1037; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
1038; CHECK-NEXT:    ret
1039  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1040  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> %pg,
1041                                                                   <vscale x 8 x half> %a,
1042                                                                   <vscale x 8 x half> %b)
1043  ret <vscale x 8 x half> %out
1044}
1045
1046define <vscale x 4 x float> @fadd_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1047; CHECK-LABEL: fadd_float:
1048; CHECK:       // %bb.0:
1049; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
1050; CHECK-NEXT:    ret
1051  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1052  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> %pg,
1053                                                                    <vscale x 4 x float> %a,
1054                                                                    <vscale x 4 x float> %b)
1055  ret <vscale x 4 x float> %out
1056}
1057
1058define <vscale x 2 x double> @fadd_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1059; CHECK-LABEL: fadd_double:
1060; CHECK:       // %bb.0:
1061; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
1062; CHECK-NEXT:    ret
1063  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1064  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> %pg,
1065                                                                     <vscale x 2 x double> %a,
1066                                                                     <vscale x 2 x double> %b)
1067  ret <vscale x 2 x double> %out
1068}
1069
1070;
1071; FSUB
1072;
1073
1074define <vscale x 8 x half> @fsub_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1075; CHECK-LABEL: fsub_half:
1076; CHECK:       // %bb.0:
1077; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
1078; CHECK-NEXT:    ret
1079  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1080  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> %pg,
1081                                                                   <vscale x 8 x half> %a,
1082                                                                   <vscale x 8 x half> %b)
1083  ret <vscale x 8 x half> %out
1084}
1085
1086define <vscale x 4 x float> @fsub_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1087; CHECK-LABEL: fsub_float:
1088; CHECK:       // %bb.0:
1089; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
1090; CHECK-NEXT:    ret
1091  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1092  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> %pg,
1093                                                                    <vscale x 4 x float> %a,
1094                                                                    <vscale x 4 x float> %b)
1095  ret <vscale x 4 x float> %out
1096}
1097
1098define <vscale x 2 x double> @fsub_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1099; CHECK-LABEL: fsub_double:
1100; CHECK:       // %bb.0:
1101; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
1102; CHECK-NEXT:    ret
1103  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1104  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> %pg,
1105                                                                     <vscale x 2 x double> %a,
1106                                                                     <vscale x 2 x double> %b)
1107  ret <vscale x 2 x double> %out
1108}
1109
1110;
1111; FMUL
1112;
1113
1114define <vscale x 8 x half> @fmul_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
1115; CHECK-LABEL: fmul_half:
1116; CHECK:       // %bb.0:
1117; CHECK-NEXT:    fmul z0.h, z0.h, z1.h
1118; CHECK-NEXT:    ret
1119  %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
1120  %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> %pg,
1121                                                                   <vscale x 8 x half> %a,
1122                                                                   <vscale x 8 x half> %b)
1123  ret <vscale x 8 x half> %out
1124}
1125
1126define <vscale x 4 x float> @fmul_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
1127; CHECK-LABEL: fmul_float:
1128; CHECK:       // %bb.0:
1129; CHECK-NEXT:    fmul z0.s, z0.s, z1.s
1130; CHECK-NEXT:    ret
1131  %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
1132  %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> %pg,
1133                                                                    <vscale x 4 x float> %a,
1134                                                                    <vscale x 4 x float> %b)
1135  ret <vscale x 4 x float> %out
1136}
1137
1138define <vscale x 2 x double> @fmul_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
1139; CHECK-LABEL: fmul_double:
1140; CHECK:       // %bb.0:
1141; CHECK-NEXT:    fmul z0.d, z0.d, z1.d
1142; CHECK-NEXT:    ret
1143  %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
1144  %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> %pg,
1145                                                                     <vscale x 2 x double> %a,
1146                                                                     <vscale x 2 x double> %b)
1147  ret <vscale x 2 x double> %out
1148}
1149
1150declare <vscale x 16 x  i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1151declare <vscale x  8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1152declare <vscale x  4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1153declare <vscale x  2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1154
1155declare <vscale x  4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1156
1157declare <vscale x 16 x  i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1158declare <vscale x  8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1159declare <vscale x  4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1160declare <vscale x  2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1161
1162declare <vscale x 16 x  i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1163declare <vscale x  8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1164declare <vscale x  4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1165declare <vscale x  2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1166
1167declare <vscale x 16 x  i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1168declare <vscale x  8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1169declare <vscale x  4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1170declare <vscale x  2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1171
1172declare <vscale x  4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1173
1174declare <vscale x 16 x  i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1175declare <vscale x  8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1176declare <vscale x  4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1177declare <vscale x  2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1178
1179declare <vscale x 16 x  i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1180declare <vscale x  8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1181declare <vscale x  4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1182declare <vscale x  2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1183
1184declare <vscale x 16 x  i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1185declare <vscale x  8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1186declare <vscale x  4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1187declare <vscale x  2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1188
1189declare <vscale x 16 x  i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1190declare <vscale x  8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1191declare <vscale x  4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1192declare <vscale x  2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1193
1194declare <vscale x  4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1195
1196declare <vscale x 16 x  i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1197declare <vscale x  8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1198declare <vscale x  4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1199declare <vscale x  2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1200
1201declare <vscale x 16 x  i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1202declare <vscale x  8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1203declare <vscale x  4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1204declare <vscale x  2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1205
1206declare <vscale x 16 x  i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1207declare <vscale x  8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1208declare <vscale x  4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1209declare <vscale x  2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1210
1211declare <vscale x 16 x  i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1212declare <vscale x  8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1213declare <vscale x  4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1214declare <vscale x  2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1215
1216declare <vscale x  4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1217
1218declare <vscale x 16 x  i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 16 x  i8>)
1219declare <vscale x  8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x  8 x i16>)
1220declare <vscale x  4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x  4 x i32>)
1221declare <vscale x  2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x  2 x i1>, <vscale x  2 x i64>, <vscale x  2 x i64>)
1222
1223declare <vscale x 16 x  i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 2 x i64>)
1224declare <vscale x  8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x 2 x i64>)
1225declare <vscale x  4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x 2 x i64>)
1226
1227declare <vscale x 16 x  i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 2 x i64>)
1228declare <vscale x  8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x 2 x i64>)
1229declare <vscale x  4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x 2 x i64>)
1230
1231declare <vscale x 16 x  i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x  i8>, <vscale x 2 x i64>)
1232declare <vscale x  8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x  8 x i1>, <vscale x  8 x i16>, <vscale x 2 x i64>)
1233declare <vscale x  4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x  4 x i1>, <vscale x  4 x i32>, <vscale x 2 x i64>)
1234
1235declare <vscale x 8 x   half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
1236declare <vscale x 4 x  float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
1237declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1238
1239declare <vscale x 8 x   half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
1240declare <vscale x 4 x  float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
1241declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1242
1243declare <vscale x 8 x   half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x   half>, <vscale x 8 x   half>)
1244declare <vscale x 4 x  float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x  float>, <vscale x 4 x  float>)
1245declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
1246
1247declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
1248declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
1249declare <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1>)
1250
1251declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 16 x i1>)
1252declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
1253declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1>)
1254
1255declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
1256declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
1257declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
1258declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
1259