xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-complex-arith.ll (revision 62baf21daa377c4ec1a641b26931063c1117d262)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
4
5;
6; CADD
7;
8
9define <vscale x 16 x i8> @cadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
10; CHECK-LABEL: cadd_b:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    cadd z0.b, z0.b, z1.b, #90
13; CHECK-NEXT:    ret
14  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8> %a,
15                                                                  <vscale x 16 x i8> %b,
16                                                                  i32 90)
17  ret <vscale x 16 x i8> %out
18}
19
20define <vscale x 8 x i16> @cadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
21; CHECK-LABEL: cadd_h:
22; CHECK:       // %bb.0:
23; CHECK-NEXT:    cadd z0.h, z0.h, z1.h, #90
24; CHECK-NEXT:    ret
25  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16> %a,
26                                                                  <vscale x 8 x i16> %b,
27                                                                  i32 90)
28  ret <vscale x 8 x i16> %out
29}
30
31define <vscale x 4 x i32> @cadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
32; CHECK-LABEL: cadd_s:
33; CHECK:       // %bb.0:
34; CHECK-NEXT:    cadd z0.s, z0.s, z1.s, #270
35; CHECK-NEXT:    ret
36  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32> %a,
37                                                                  <vscale x 4 x i32> %b,
38                                                                  i32 270)
39  ret <vscale x 4 x i32> %out
40}
41
42define <vscale x 2 x i64> @cadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
43; CHECK-LABEL: cadd_d:
44; CHECK:       // %bb.0:
45; CHECK-NEXT:    cadd z0.d, z0.d, z1.d, #270
46; CHECK-NEXT:    ret
47  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64> %a,
48                                                                  <vscale x 2 x i64> %b,
49                                                                  i32 270)
50  ret <vscale x 2 x i64> %out
51}
52
53;
54; SQCADD
55;
56
57define <vscale x 16 x i8> @sqcadd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
58; CHECK-LABEL: sqcadd_b:
59; CHECK:       // %bb.0:
60; CHECK-NEXT:    sqcadd z0.b, z0.b, z1.b, #90
61; CHECK-NEXT:    ret
62  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8> %a,
63                                                                    <vscale x 16 x i8> %b,
64                                                                    i32 90)
65  ret <vscale x 16 x i8> %out
66}
67
68define <vscale x 8 x i16> @sqcadd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
69; CHECK-LABEL: sqcadd_h:
70; CHECK:       // %bb.0:
71; CHECK-NEXT:    sqcadd z0.h, z0.h, z1.h, #90
72; CHECK-NEXT:    ret
73  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16> %a,
74                                                                    <vscale x 8 x i16> %b,
75                                                                    i32 90)
76  ret <vscale x 8 x i16> %out
77}
78
79define <vscale x 4 x i32> @sqcadd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
80; CHECK-LABEL: sqcadd_s:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    sqcadd z0.s, z0.s, z1.s, #270
83; CHECK-NEXT:    ret
84  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32> %a,
85                                                                    <vscale x 4 x i32> %b,
86                                                                    i32 270)
87  ret <vscale x 4 x i32> %out
88}
89
90define <vscale x 2 x i64> @sqcadd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
91; CHECK-LABEL: sqcadd_d:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    sqcadd z0.d, z0.d, z1.d, #270
94; CHECK-NEXT:    ret
95  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64> %a,
96                                                                    <vscale x 2 x i64> %b,
97                                                                    i32 270)
98  ret <vscale x 2 x i64> %out
99}
100
101;
102; CMLA
103;
104
105define <vscale x 16 x i8> @cmla_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
106; CHECK-LABEL: cmla_b:
107; CHECK:       // %bb.0:
108; CHECK-NEXT:    cmla z0.b, z1.b, z2.b, #90
109; CHECK-NEXT:    ret
110  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8> %a,
111                                                                  <vscale x 16 x i8> %b,
112                                                                  <vscale x 16 x i8> %c,
113                                                                  i32 90)
114  ret <vscale x 16 x i8> %out
115}
116
117define <vscale x 8 x i16> @cmla_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
118; CHECK-LABEL: cmla_h:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    cmla z0.h, z1.h, z2.h, #180
121; CHECK-NEXT:    ret
122  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16> %a,
123                                                                  <vscale x 8 x i16> %b,
124                                                                  <vscale x 8 x i16> %c,
125                                                                  i32 180)
126  ret <vscale x 8 x i16> %out
127}
128
129define <vscale x 4 x i32> @cmla_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
130; CHECK-LABEL: cmla_s:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    cmla z0.s, z1.s, z2.s, #270
133; CHECK-NEXT:    ret
134  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32> %a,
135                                                                  <vscale x 4 x i32> %b,
136                                                                  <vscale x 4 x i32> %c,
137                                                                  i32 270)
138  ret <vscale x 4 x i32> %out
139}
140
141define <vscale x 2 x i64> @cmla_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
142; CHECK-LABEL: cmla_d:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    cmla z0.d, z1.d, z2.d, #0
145; CHECK-NEXT:    ret
146  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64> %a,
147                                                                  <vscale x 2 x i64> %b,
148                                                                  <vscale x 2 x i64> %c,
149                                                                  i32 0)
150  ret <vscale x 2 x i64> %out
151}
152
153;
154; CMLA_LANE
155;
156
157define <vscale x 8 x i16> @cmla_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
158; CHECK-LABEL: cmla_lane_h:
159; CHECK:       // %bb.0:
160; CHECK-NEXT:    cmla z0.h, z1.h, z2.h[1], #180
161; CHECK-NEXT:    ret
162  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16> %a,
163                                                                       <vscale x 8 x i16> %b,
164                                                                       <vscale x 8 x i16> %c,
165                                                                       i32 1,
166                                                                       i32 180)
167  ret <vscale x 8 x i16> %out
168}
169
170define <vscale x 4 x i32> @cmla_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
171; CHECK-LABEL: cmla_lane_s:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    cmla z0.s, z1.s, z2.s[0], #270
174; CHECK-NEXT:    ret
175  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32> %a,
176                                                                       <vscale x 4 x i32> %b,
177                                                                       <vscale x 4 x i32> %c,
178                                                                       i32 0,
179                                                                       i32 270)
180  ret <vscale x 4 x i32> %out
181}
182
183;
184; QRDCMLAH
185;
186
187define <vscale x 16 x i8> @sqrdcmlah_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
188; CHECK-LABEL: sqrdcmlah_b:
189; CHECK:       // %bb.0:
190; CHECK-NEXT:    sqrdcmlah z0.b, z1.b, z2.b, #0
191; CHECK-NEXT:    ret
192  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8> %a,
193                                                                       <vscale x 16 x i8> %b,
194                                                                       <vscale x 16 x i8> %c,
195                                                                       i32 0)
196  ret <vscale x 16 x i8> %out
197}
198
199define <vscale x 8 x i16> @sqrdcmlah_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
200; CHECK-LABEL: sqrdcmlah_h:
201; CHECK:       // %bb.0:
202; CHECK-NEXT:    sqrdcmlah z0.h, z1.h, z2.h, #90
203; CHECK-NEXT:    ret
204  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16> %a,
205                                                                       <vscale x 8 x i16> %b,
206                                                                       <vscale x 8 x i16> %c,
207                                                                       i32 90)
208  ret <vscale x 8 x i16> %out
209}
210
211define <vscale x 4 x i32> @sqrdcmlah_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
212; CHECK-LABEL: sqrdcmlah_s:
213; CHECK:       // %bb.0:
214; CHECK-NEXT:    sqrdcmlah z0.s, z1.s, z2.s, #180
215; CHECK-NEXT:    ret
216  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32> %a,
217                                                                       <vscale x 4 x i32> %b,
218                                                                       <vscale x 4 x i32> %c,
219                                                                       i32 180)
220  ret <vscale x 4 x i32> %out
221}
222
223define <vscale x 2 x i64> @sqrdcmlah_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
224; CHECK-LABEL: sqrdcmlah_d:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    sqrdcmlah z0.d, z1.d, z2.d, #270
227; CHECK-NEXT:    ret
228  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64> %a,
229                                                                       <vscale x 2 x i64> %b,
230                                                                       <vscale x 2 x i64> %c,
231                                                                       i32 270)
232  ret <vscale x 2 x i64> %out
233}
234
235;
236; QRDCMLAH_LANE
237;
238
239define <vscale x 8 x i16> @sqrdcmlah_lane_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
240; CHECK-LABEL: sqrdcmlah_lane_h:
241; CHECK:       // %bb.0:
242; CHECK-NEXT:    sqrdcmlah z0.h, z1.h, z2.h[1], #90
243; CHECK-NEXT:    ret
244  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16> %a,
245                                                                            <vscale x 8 x i16> %b,
246                                                                            <vscale x 8 x i16> %c,
247                                                                            i32 1,
248                                                                            i32 90)
249  ret <vscale x 8 x i16> %out
250}
251
252define <vscale x 4 x i32> @sqrdcmlah_lane_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
253; CHECK-LABEL: sqrdcmlah_lane_s:
254; CHECK:       // %bb.0:
255; CHECK-NEXT:    sqrdcmlah z0.s, z1.s, z2.s[0], #180
256; CHECK-NEXT:    ret
257  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32> %a,
258                                                                            <vscale x 4 x i32> %b,
259                                                                            <vscale x 4 x i32> %c,
260                                                                            i32 0,
261                                                                            i32 180)
262  ret <vscale x 4 x i32> %out
263}
264
265declare <vscale x 16 x i8> @llvm.aarch64.sve.cadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
266declare <vscale x 8 x i16> @llvm.aarch64.sve.cadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
267declare <vscale x 4 x i32> @llvm.aarch64.sve.cadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
268declare <vscale x 2 x i64> @llvm.aarch64.sve.cadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
269
270declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcadd.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
271declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcadd.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
272declare <vscale x 4 x i32> @llvm.aarch64.sve.sqcadd.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
273declare <vscale x 2 x i64> @llvm.aarch64.sve.sqcadd.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
274
275declare <vscale x 16 x i8> @llvm.aarch64.sve.cmla.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
276declare <vscale x 8 x i16> @llvm.aarch64.sve.cmla.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
277declare <vscale x 4 x i32> @llvm.aarch64.sve.cmla.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
278declare <vscale x 2 x i64> @llvm.aarch64.sve.cmla.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
279
280declare <vscale x 8 x i16> @llvm.aarch64.sve.cmla.lane.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32, i32)
281declare <vscale x 4 x i32> @llvm.aarch64.sve.cmla.lane.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32, i32)
282
283declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrdcmlah.x.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
284declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
285declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
286declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdcmlah.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
287
288declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32, i32)
289declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdcmlah.lane.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32, i32)
290