xref: /llvm-project/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-add-sub.ll (revision 62baf21daa377c4ec1a641b26931063c1117d262)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s
4
5; ADDHNB
6
7define <vscale x 16 x i8> @addhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
8; CHECK-LABEL: addhnb_h:
9; CHECK:       // %bb.0:
10; CHECK-NEXT:    addhnb z0.b, z0.h, z1.h
11; CHECK-NEXT:    ret
12  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.addhnb.nxv8i16(<vscale x 8 x i16> %a,
13                                                                  <vscale x 8 x i16> %b)
14  ret <vscale x 16 x i8> %out
15}
16
17define <vscale x 8 x i16> @addhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
18; CHECK-LABEL: addhnb_s:
19; CHECK:       // %bb.0:
20; CHECK-NEXT:    addhnb z0.h, z0.s, z1.s
21; CHECK-NEXT:    ret
22  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.addhnb.nxv4i32(<vscale x 4 x i32> %a,
23                                                                  <vscale x 4 x i32> %b)
24  ret <vscale x 8 x i16> %out
25}
26
27define <vscale x 4 x i32> @addhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
28; CHECK-LABEL: addhnb_d:
29; CHECK:       // %bb.0:
30; CHECK-NEXT:    addhnb z0.s, z0.d, z1.d
31; CHECK-NEXT:    ret
32  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.addhnb.nxv2i64(<vscale x 2 x i64> %a,
33                                                                  <vscale x 2 x i64> %b)
34  ret <vscale x 4 x i32> %out
35}
36
37; ADDHNT
38
39define <vscale x 16 x i8> @addhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
40; CHECK-LABEL: addhnt_h:
41; CHECK:       // %bb.0:
42; CHECK-NEXT:    addhnt z0.b, z1.h, z2.h
43; CHECK-NEXT:    ret
44  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.addhnt.nxv8i16(<vscale x 16 x i8> %a,
45                                                                  <vscale x 8 x i16> %b,
46                                                                  <vscale x 8 x i16> %c)
47  ret <vscale x 16 x i8> %out
48}
49
50define <vscale x 8 x i16> @addhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
51; CHECK-LABEL: addhnt_s:
52; CHECK:       // %bb.0:
53; CHECK-NEXT:    addhnt z0.h, z1.s, z2.s
54; CHECK-NEXT:    ret
55  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.addhnt.nxv4i32(<vscale x 8 x i16> %a,
56                                                                  <vscale x 4 x i32> %b,
57                                                                  <vscale x 4 x i32> %c)
58  ret <vscale x 8 x i16> %out
59}
60
61define <vscale x 4 x i32> @addhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
62; CHECK-LABEL: addhnt_d:
63; CHECK:       // %bb.0:
64; CHECK-NEXT:    addhnt z0.s, z1.d, z2.d
65; CHECK-NEXT:    ret
66  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.addhnt.nxv2i64(<vscale x 4 x i32> %a,
67                                                                  <vscale x 2 x i64> %b,
68                                                                  <vscale x 2 x i64> %c)
69  ret <vscale x 4 x i32> %out
70}
71
72; RADDHNB
73
74define <vscale x 16 x i8> @raddhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
75; CHECK-LABEL: raddhnb_h:
76; CHECK:       // %bb.0:
77; CHECK-NEXT:    raddhnb z0.b, z0.h, z1.h
78; CHECK-NEXT:    ret
79  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.raddhnb.nxv8i16(<vscale x 8 x i16> %a,
80                                                                   <vscale x 8 x i16> %b)
81  ret <vscale x 16 x i8> %out
82}
83
84define <vscale x 8 x i16> @raddhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
85; CHECK-LABEL: raddhnb_s:
86; CHECK:       // %bb.0:
87; CHECK-NEXT:    raddhnb z0.h, z0.s, z1.s
88; CHECK-NEXT:    ret
89  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.raddhnb.nxv4i32(<vscale x 4 x i32> %a,
90                                                                   <vscale x 4 x i32> %b)
91  ret <vscale x 8 x i16> %out
92}
93
94define <vscale x 4 x i32> @raddhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
95; CHECK-LABEL: raddhnb_d:
96; CHECK:       // %bb.0:
97; CHECK-NEXT:    raddhnb z0.s, z0.d, z1.d
98; CHECK-NEXT:    ret
99  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.raddhnb.nxv2i64(<vscale x 2 x i64> %a,
100                                                                   <vscale x 2 x i64> %b)
101  ret <vscale x 4 x i32> %out
102}
103
104; RADDHNT
105
106define <vscale x 16 x i8> @raddhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
107; CHECK-LABEL: raddhnt_h:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    raddhnt z0.b, z1.h, z2.h
110; CHECK-NEXT:    ret
111  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.raddhnt.nxv8i16(<vscale x 16 x i8> %a,
112                                                                   <vscale x 8 x i16> %b,
113                                                                   <vscale x 8 x i16> %c)
114  ret <vscale x 16 x i8> %out
115}
116
117define <vscale x 8 x i16> @raddhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
118; CHECK-LABEL: raddhnt_s:
119; CHECK:       // %bb.0:
120; CHECK-NEXT:    raddhnt z0.h, z1.s, z2.s
121; CHECK-NEXT:    ret
122  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.raddhnt.nxv4i32(<vscale x 8 x i16> %a,
123                                                                   <vscale x 4 x i32> %b,
124                                                                   <vscale x 4 x i32> %c)
125  ret <vscale x 8 x i16> %out
126}
127
128define <vscale x 4 x i32> @raddhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
129; CHECK-LABEL: raddhnt_d:
130; CHECK:       // %bb.0:
131; CHECK-NEXT:    raddhnt z0.s, z1.d, z2.d
132; CHECK-NEXT:    ret
133  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.raddhnt.nxv2i64(<vscale x 4 x i32> %a,
134                                                                   <vscale x 2 x i64> %b,
135                                                                   <vscale x 2 x i64> %c)
136  ret <vscale x 4 x i32> %out
137}
138
139; RSUBHNB
140
141define <vscale x 16 x i8> @rsubhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
142; CHECK-LABEL: rsubhnb_h:
143; CHECK:       // %bb.0:
144; CHECK-NEXT:    rsubhnb z0.b, z0.h, z1.h
145; CHECK-NEXT:    ret
146  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.rsubhnb.nxv8i16(<vscale x 8 x i16> %a,
147                                                                   <vscale x 8 x i16> %b)
148  ret <vscale x 16 x i8> %out
149}
150
151define <vscale x 8 x i16> @rsubhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
152; CHECK-LABEL: rsubhnb_s:
153; CHECK:       // %bb.0:
154; CHECK-NEXT:    rsubhnb z0.h, z0.s, z1.s
155; CHECK-NEXT:    ret
156  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.rsubhnb.nxv4i32(<vscale x 4 x i32> %a,
157                                                                   <vscale x 4 x i32> %b)
158  ret <vscale x 8 x i16> %out
159}
160
161define <vscale x 4 x i32> @rsubhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
162; CHECK-LABEL: rsubhnb_d:
163; CHECK:       // %bb.0:
164; CHECK-NEXT:    rsubhnb z0.s, z0.d, z1.d
165; CHECK-NEXT:    ret
166  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.rsubhnb.nxv2i64(<vscale x 2 x i64> %a,
167                                                                   <vscale x 2 x i64> %b)
168  ret <vscale x 4 x i32> %out
169}
170
171; RSUBHNT
172
173define <vscale x 16 x i8> @rsubhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
174; CHECK-LABEL: rsubhnt_h:
175; CHECK:       // %bb.0:
176; CHECK-NEXT:    rsubhnt z0.b, z1.h, z2.h
177; CHECK-NEXT:    ret
178  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.rsubhnt.nxv8i16(<vscale x 16 x i8> %a,
179                                                                   <vscale x 8 x i16> %b,
180                                                                   <vscale x 8 x i16> %c)
181  ret <vscale x 16 x i8> %out
182}
183
184define <vscale x 8 x i16> @rsubhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
185; CHECK-LABEL: rsubhnt_s:
186; CHECK:       // %bb.0:
187; CHECK-NEXT:    rsubhnt z0.h, z1.s, z2.s
188; CHECK-NEXT:    ret
189  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.rsubhnt.nxv4i32(<vscale x 8 x i16> %a,
190                                                                   <vscale x 4 x i32> %b,
191                                                                   <vscale x 4 x i32> %c)
192  ret <vscale x 8 x i16> %out
193}
194
195define <vscale x 4 x i32> @rsubhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
196; CHECK-LABEL: rsubhnt_d:
197; CHECK:       // %bb.0:
198; CHECK-NEXT:    rsubhnt z0.s, z1.d, z2.d
199; CHECK-NEXT:    ret
200  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.rsubhnt.nxv2i64(<vscale x 4 x i32> %a,
201                                                                   <vscale x 2 x i64> %b,
202                                                                   <vscale x 2 x i64> %c)
203  ret <vscale x 4 x i32> %out
204}
205
206; SUBHNB
207
208define <vscale x 16 x i8> @subhnb_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
209; CHECK-LABEL: subhnb_h:
210; CHECK:       // %bb.0:
211; CHECK-NEXT:    subhnb z0.b, z0.h, z1.h
212; CHECK-NEXT:    ret
213  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.subhnb.nxv8i16(<vscale x 8 x i16> %a,
214                                                                  <vscale x 8 x i16> %b)
215  ret <vscale x 16 x i8> %out
216}
217
218define <vscale x 8 x i16> @subhnb_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
219; CHECK-LABEL: subhnb_s:
220; CHECK:       // %bb.0:
221; CHECK-NEXT:    subhnb z0.h, z0.s, z1.s
222; CHECK-NEXT:    ret
223  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subhnb.nxv4i32(<vscale x 4 x i32> %a,
224                                                                  <vscale x 4 x i32> %b)
225  ret <vscale x 8 x i16> %out
226}
227
228define <vscale x 4 x i32> @subhnb_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
229; CHECK-LABEL: subhnb_d:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    subhnb z0.s, z0.d, z1.d
232; CHECK-NEXT:    ret
233  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subhnb.nxv2i64(<vscale x 2 x i64> %a,
234                                                                  <vscale x 2 x i64> %b)
235  ret <vscale x 4 x i32> %out
236}
237
238; SUBHNT
239
240define <vscale x 16 x i8> @subhnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
241; CHECK-LABEL: subhnt_h:
242; CHECK:       // %bb.0:
243; CHECK-NEXT:    subhnt z0.b, z1.h, z2.h
244; CHECK-NEXT:    ret
245  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.subhnt.nxv8i16(<vscale x 16 x i8> %a,
246                                                                  <vscale x 8 x i16> %b,
247                                                                  <vscale x 8 x i16> %c)
248  ret <vscale x 16 x i8> %out
249}
250
251define <vscale x 8 x i16> @subhnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
252; CHECK-LABEL: subhnt_s:
253; CHECK:       // %bb.0:
254; CHECK-NEXT:    subhnt z0.h, z1.s, z2.s
255; CHECK-NEXT:    ret
256  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.subhnt.nxv4i32(<vscale x 8 x i16> %a,
257                                                                  <vscale x 4 x i32> %b,
258                                                                  <vscale x 4 x i32> %c)
259  ret <vscale x 8 x i16> %out
260}
261
262define <vscale x 4 x i32> @subhnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
263; CHECK-LABEL: subhnt_d:
264; CHECK:       // %bb.0:
265; CHECK-NEXT:    subhnt z0.s, z1.d, z2.d
266; CHECK-NEXT:    ret
267  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.subhnt.nxv2i64(<vscale x 4 x i32> %a,
268                                                                  <vscale x 2 x i64> %b,
269                                                                  <vscale x 2 x i64> %c)
270  ret <vscale x 4 x i32> %out
271}
272
273
274declare <vscale x 16 x i8> @llvm.aarch64.sve.addhnb.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
275declare <vscale x 8 x i16> @llvm.aarch64.sve.addhnb.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
276declare <vscale x 4 x i32> @llvm.aarch64.sve.addhnb.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
277
278declare <vscale x 16 x i8> @llvm.aarch64.sve.addhnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, <vscale x 8 x i16>)
279declare <vscale x 8 x i16> @llvm.aarch64.sve.addhnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, <vscale x 4 x i32>)
280declare <vscale x 4 x i32> @llvm.aarch64.sve.addhnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, <vscale x 2 x i64>)
281
282declare <vscale x 16 x i8> @llvm.aarch64.sve.raddhnb.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
283declare <vscale x 8 x i16> @llvm.aarch64.sve.raddhnb.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
284declare <vscale x 4 x i32> @llvm.aarch64.sve.raddhnb.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
285
286declare <vscale x 16 x i8> @llvm.aarch64.sve.raddhnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, <vscale x 8 x i16>)
287declare <vscale x 8 x i16> @llvm.aarch64.sve.raddhnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, <vscale x 4 x i32>)
288declare <vscale x 4 x i32> @llvm.aarch64.sve.raddhnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, <vscale x 2 x i64>)
289
290declare <vscale x 16 x i8> @llvm.aarch64.sve.subhnb.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
291declare <vscale x 8 x i16> @llvm.aarch64.sve.subhnb.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
292declare <vscale x 4 x i32> @llvm.aarch64.sve.subhnb.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
293
294declare <vscale x 16 x i8> @llvm.aarch64.sve.subhnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, <vscale x 8 x i16>)
295declare <vscale x 8 x i16> @llvm.aarch64.sve.subhnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, <vscale x 4 x i32>)
296declare <vscale x 4 x i32> @llvm.aarch64.sve.subhnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, <vscale x 2 x i64>)
297
298declare <vscale x 16 x i8> @llvm.aarch64.sve.rsubhnb.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
299declare <vscale x 8 x i16> @llvm.aarch64.sve.rsubhnb.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
300declare <vscale x 4 x i32> @llvm.aarch64.sve.rsubhnb.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
301
302declare <vscale x 16 x i8> @llvm.aarch64.sve.rsubhnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, <vscale x 8 x i16>)
303declare <vscale x 8 x i16> @llvm.aarch64.sve.rsubhnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, <vscale x 4 x i32>)
304declare <vscale x 4 x i32> @llvm.aarch64.sve.rsubhnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, <vscale x 2 x i64>)
305