xref: /llvm-project/llvm/test/CodeGen/ARM/neon-vqaddsub-upgrade.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
2
3define <8 x i8> @vqadds8(ptr %A, ptr %B) nounwind {
4;CHECK-LABEL: vqadds8:
5;CHECK: vqadd.s8
6	%tmp1 = load <8 x i8>, ptr %A
7	%tmp2 = load <8 x i8>, ptr %B
8	%tmp3 = call <8 x i8> @llvm.arm.neon.vqadds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
9	ret <8 x i8> %tmp3
10}
11
12define <4 x i16> @vqadds16(ptr %A, ptr %B) nounwind {
13;CHECK-LABEL: vqadds16:
14;CHECK: vqadd.s16
15	%tmp1 = load <4 x i16>, ptr %A
16	%tmp2 = load <4 x i16>, ptr %B
17	%tmp3 = call <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
18	ret <4 x i16> %tmp3
19}
20
21define <2 x i32> @vqadds32(ptr %A, ptr %B) nounwind {
22;CHECK-LABEL: vqadds32:
23;CHECK: vqadd.s32
24	%tmp1 = load <2 x i32>, ptr %A
25	%tmp2 = load <2 x i32>, ptr %B
26	%tmp3 = call <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
27	ret <2 x i32> %tmp3
28}
29
30define <1 x i64> @vqadds64(ptr %A, ptr %B) nounwind {
31;CHECK-LABEL: vqadds64:
32;CHECK: vqadd.s64
33	%tmp1 = load <1 x i64>, ptr %A
34	%tmp2 = load <1 x i64>, ptr %B
35	%tmp3 = call <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
36	ret <1 x i64> %tmp3
37}
38
39define <8 x i8> @vqaddu8(ptr %A, ptr %B) nounwind {
40;CHECK-LABEL: vqaddu8:
41;CHECK: vqadd.u8
42	%tmp1 = load <8 x i8>, ptr %A
43	%tmp2 = load <8 x i8>, ptr %B
44	%tmp3 = call <8 x i8> @llvm.arm.neon.vqaddu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
45	ret <8 x i8> %tmp3
46}
47
48define <4 x i16> @vqaddu16(ptr %A, ptr %B) nounwind {
49;CHECK-LABEL: vqaddu16:
50;CHECK: vqadd.u16
51	%tmp1 = load <4 x i16>, ptr %A
52	%tmp2 = load <4 x i16>, ptr %B
53	%tmp3 = call <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
54	ret <4 x i16> %tmp3
55}
56
57define <2 x i32> @vqaddu32(ptr %A, ptr %B) nounwind {
58;CHECK-LABEL: vqaddu32:
59;CHECK: vqadd.u32
60	%tmp1 = load <2 x i32>, ptr %A
61	%tmp2 = load <2 x i32>, ptr %B
62	%tmp3 = call <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
63	ret <2 x i32> %tmp3
64}
65
66define <1 x i64> @vqaddu64(ptr %A, ptr %B) nounwind {
67;CHECK-LABEL: vqaddu64:
68;CHECK: vqadd.u64
69	%tmp1 = load <1 x i64>, ptr %A
70	%tmp2 = load <1 x i64>, ptr %B
71	%tmp3 = call <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
72	ret <1 x i64> %tmp3
73}
74
75define <16 x i8> @vqaddQs8(ptr %A, ptr %B) nounwind {
76;CHECK-LABEL: vqaddQs8:
77;CHECK: vqadd.s8
78	%tmp1 = load <16 x i8>, ptr %A
79	%tmp2 = load <16 x i8>, ptr %B
80	%tmp3 = call <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
81	ret <16 x i8> %tmp3
82}
83
84define <8 x i16> @vqaddQs16(ptr %A, ptr %B) nounwind {
85;CHECK-LABEL: vqaddQs16:
86;CHECK: vqadd.s16
87	%tmp1 = load <8 x i16>, ptr %A
88	%tmp2 = load <8 x i16>, ptr %B
89	%tmp3 = call <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
90	ret <8 x i16> %tmp3
91}
92
93define <4 x i32> @vqaddQs32(ptr %A, ptr %B) nounwind {
94;CHECK-LABEL: vqaddQs32:
95;CHECK: vqadd.s32
96	%tmp1 = load <4 x i32>, ptr %A
97	%tmp2 = load <4 x i32>, ptr %B
98	%tmp3 = call <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
99	ret <4 x i32> %tmp3
100}
101
102define <2 x i64> @vqaddQs64(ptr %A, ptr %B) nounwind {
103;CHECK-LABEL: vqaddQs64:
104;CHECK: vqadd.s64
105	%tmp1 = load <2 x i64>, ptr %A
106	%tmp2 = load <2 x i64>, ptr %B
107	%tmp3 = call <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
108	ret <2 x i64> %tmp3
109}
110
111define <16 x i8> @vqaddQu8(ptr %A, ptr %B) nounwind {
112;CHECK-LABEL: vqaddQu8:
113;CHECK: vqadd.u8
114	%tmp1 = load <16 x i8>, ptr %A
115	%tmp2 = load <16 x i8>, ptr %B
116	%tmp3 = call <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
117	ret <16 x i8> %tmp3
118}
119
120define <8 x i16> @vqaddQu16(ptr %A, ptr %B) nounwind {
121;CHECK-LABEL: vqaddQu16:
122;CHECK: vqadd.u16
123	%tmp1 = load <8 x i16>, ptr %A
124	%tmp2 = load <8 x i16>, ptr %B
125	%tmp3 = call <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
126	ret <8 x i16> %tmp3
127}
128
129define <4 x i32> @vqaddQu32(ptr %A, ptr %B) nounwind {
130;CHECK-LABEL: vqaddQu32:
131;CHECK: vqadd.u32
132	%tmp1 = load <4 x i32>, ptr %A
133	%tmp2 = load <4 x i32>, ptr %B
134	%tmp3 = call <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
135	ret <4 x i32> %tmp3
136}
137
138define <2 x i64> @vqaddQu64(ptr %A, ptr %B) nounwind {
139;CHECK-LABEL: vqaddQu64:
140;CHECK: vqadd.u64
141	%tmp1 = load <2 x i64>, ptr %A
142	%tmp2 = load <2 x i64>, ptr %B
143	%tmp3 = call <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
144	ret <2 x i64> %tmp3
145}
146
147
148define <8 x i8> @vqsubs8(ptr %A, ptr %B) nounwind {
149;CHECK-LABEL: vqsubs8:
150;CHECK: vqsub.s8
151	%tmp1 = load <8 x i8>, ptr %A
152	%tmp2 = load <8 x i8>, ptr %B
153	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubs.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
154	ret <8 x i8> %tmp3
155}
156
157define <4 x i16> @vqsubs16(ptr %A, ptr %B) nounwind {
158;CHECK-LABEL: vqsubs16:
159;CHECK: vqsub.s16
160	%tmp1 = load <4 x i16>, ptr %A
161	%tmp2 = load <4 x i16>, ptr %B
162	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
163	ret <4 x i16> %tmp3
164}
165
166define <2 x i32> @vqsubs32(ptr %A, ptr %B) nounwind {
167;CHECK-LABEL: vqsubs32:
168;CHECK: vqsub.s32
169	%tmp1 = load <2 x i32>, ptr %A
170	%tmp2 = load <2 x i32>, ptr %B
171	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
172	ret <2 x i32> %tmp3
173}
174
175define <1 x i64> @vqsubs64(ptr %A, ptr %B) nounwind {
176;CHECK-LABEL: vqsubs64:
177;CHECK: vqsub.s64
178	%tmp1 = load <1 x i64>, ptr %A
179	%tmp2 = load <1 x i64>, ptr %B
180	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
181	ret <1 x i64> %tmp3
182}
183
184define <8 x i8> @vqsubu8(ptr %A, ptr %B) nounwind {
185;CHECK-LABEL: vqsubu8:
186;CHECK: vqsub.u8
187	%tmp1 = load <8 x i8>, ptr %A
188	%tmp2 = load <8 x i8>, ptr %B
189	%tmp3 = call <8 x i8> @llvm.arm.neon.vqsubu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
190	ret <8 x i8> %tmp3
191}
192
193define <4 x i16> @vqsubu16(ptr %A, ptr %B) nounwind {
194;CHECK-LABEL: vqsubu16:
195;CHECK: vqsub.u16
196	%tmp1 = load <4 x i16>, ptr %A
197	%tmp2 = load <4 x i16>, ptr %B
198	%tmp3 = call <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
199	ret <4 x i16> %tmp3
200}
201
202define <2 x i32> @vqsubu32(ptr %A, ptr %B) nounwind {
203;CHECK-LABEL: vqsubu32:
204;CHECK: vqsub.u32
205	%tmp1 = load <2 x i32>, ptr %A
206	%tmp2 = load <2 x i32>, ptr %B
207	%tmp3 = call <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
208	ret <2 x i32> %tmp3
209}
210
211define <1 x i64> @vqsubu64(ptr %A, ptr %B) nounwind {
212;CHECK-LABEL: vqsubu64:
213;CHECK: vqsub.u64
214	%tmp1 = load <1 x i64>, ptr %A
215	%tmp2 = load <1 x i64>, ptr %B
216	%tmp3 = call <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
217	ret <1 x i64> %tmp3
218}
219
220define <16 x i8> @vqsubQs8(ptr %A, ptr %B) nounwind {
221;CHECK-LABEL: vqsubQs8:
222;CHECK: vqsub.s8
223	%tmp1 = load <16 x i8>, ptr %A
224	%tmp2 = load <16 x i8>, ptr %B
225	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
226	ret <16 x i8> %tmp3
227}
228
229define <8 x i16> @vqsubQs16(ptr %A, ptr %B) nounwind {
230;CHECK-LABEL: vqsubQs16:
231;CHECK: vqsub.s16
232	%tmp1 = load <8 x i16>, ptr %A
233	%tmp2 = load <8 x i16>, ptr %B
234	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
235	ret <8 x i16> %tmp3
236}
237
238define <4 x i32> @vqsubQs32(ptr %A, ptr %B) nounwind {
239;CHECK-LABEL: vqsubQs32:
240;CHECK: vqsub.s32
241	%tmp1 = load <4 x i32>, ptr %A
242	%tmp2 = load <4 x i32>, ptr %B
243	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
244	ret <4 x i32> %tmp3
245}
246
247define <2 x i64> @vqsubQs64(ptr %A, ptr %B) nounwind {
248;CHECK-LABEL: vqsubQs64:
249;CHECK: vqsub.s64
250	%tmp1 = load <2 x i64>, ptr %A
251	%tmp2 = load <2 x i64>, ptr %B
252	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
253	ret <2 x i64> %tmp3
254}
255
256define <16 x i8> @vqsubQu8(ptr %A, ptr %B) nounwind {
257;CHECK-LABEL: vqsubQu8:
258;CHECK: vqsub.u8
259	%tmp1 = load <16 x i8>, ptr %A
260	%tmp2 = load <16 x i8>, ptr %B
261	%tmp3 = call <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
262	ret <16 x i8> %tmp3
263}
264
265define <8 x i16> @vqsubQu16(ptr %A, ptr %B) nounwind {
266;CHECK-LABEL: vqsubQu16:
267;CHECK: vqsub.u16
268	%tmp1 = load <8 x i16>, ptr %A
269	%tmp2 = load <8 x i16>, ptr %B
270	%tmp3 = call <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
271	ret <8 x i16> %tmp3
272}
273
274define <4 x i32> @vqsubQu32(ptr %A, ptr %B) nounwind {
275;CHECK-LABEL: vqsubQu32:
276;CHECK: vqsub.u32
277	%tmp1 = load <4 x i32>, ptr %A
278	%tmp2 = load <4 x i32>, ptr %B
279	%tmp3 = call <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
280	ret <4 x i32> %tmp3
281}
282
283define <2 x i64> @vqsubQu64(ptr %A, ptr %B) nounwind {
284;CHECK-LABEL: vqsubQu64:
285;CHECK: vqsub.u64
286	%tmp1 = load <2 x i64>, ptr %A
287	%tmp2 = load <2 x i64>, ptr %B
288	%tmp3 = call <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
289	ret <2 x i64> %tmp3
290}
291
292declare <8 x i8>  @llvm.arm.neon.vqadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
293declare <4 x i16> @llvm.arm.neon.vqadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
294declare <2 x i32> @llvm.arm.neon.vqadds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
295declare <1 x i64> @llvm.arm.neon.vqadds.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
296
297declare <8 x i8>  @llvm.arm.neon.vqaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
298declare <4 x i16> @llvm.arm.neon.vqaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
299declare <2 x i32> @llvm.arm.neon.vqaddu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
300declare <1 x i64> @llvm.arm.neon.vqaddu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
301
302declare <16 x i8> @llvm.arm.neon.vqadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
303declare <8 x i16> @llvm.arm.neon.vqadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
304declare <4 x i32> @llvm.arm.neon.vqadds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
305declare <2 x i64> @llvm.arm.neon.vqadds.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
306
307declare <16 x i8> @llvm.arm.neon.vqaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
308declare <8 x i16> @llvm.arm.neon.vqaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
309declare <4 x i32> @llvm.arm.neon.vqaddu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
310declare <2 x i64> @llvm.arm.neon.vqaddu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
311
312declare <8 x i8>  @llvm.arm.neon.vqsubs.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
313declare <4 x i16> @llvm.arm.neon.vqsubs.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
314declare <2 x i32> @llvm.arm.neon.vqsubs.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
315declare <1 x i64> @llvm.arm.neon.vqsubs.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
316
317declare <8 x i8>  @llvm.arm.neon.vqsubu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
318declare <4 x i16> @llvm.arm.neon.vqsubu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
319declare <2 x i32> @llvm.arm.neon.vqsubu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
320declare <1 x i64> @llvm.arm.neon.vqsubu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
321
322declare <16 x i8> @llvm.arm.neon.vqsubs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
323declare <8 x i16> @llvm.arm.neon.vqsubs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
324declare <4 x i32> @llvm.arm.neon.vqsubs.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
325declare <2 x i64> @llvm.arm.neon.vqsubs.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
326
327declare <16 x i8> @llvm.arm.neon.vqsubu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
328declare <8 x i16> @llvm.arm.neon.vqsubu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
329declare <4 x i32> @llvm.arm.neon.vqsubu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
330declare <2 x i64> @llvm.arm.neon.vqsubu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
331