xref: /llvm-project/llvm/test/CodeGen/ARM/vabd.ll (revision ac02168990aa8429898d0c59fec7a78526638c5c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s
3
4define <8 x i8> @vabds8(ptr %A, ptr %B) nounwind {
5; CHECK-LABEL: vabds8:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vldr d16, [r1]
8; CHECK-NEXT:    vldr d17, [r0]
9; CHECK-NEXT:    vabd.s8 d0, d17, d16
10; CHECK-NEXT:    bx lr
11	%tmp1 = load <8 x i8>, ptr %A
12	%tmp2 = load <8 x i8>, ptr %B
13	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
14	ret <8 x i8> %tmp3
15}
16
17define <4 x i16> @vabds16(ptr %A, ptr %B) nounwind {
18; CHECK-LABEL: vabds16:
19; CHECK:       @ %bb.0:
20; CHECK-NEXT:    vldr d16, [r1]
21; CHECK-NEXT:    vldr d17, [r0]
22; CHECK-NEXT:    vabd.s16 d0, d17, d16
23; CHECK-NEXT:    bx lr
24	%tmp1 = load <4 x i16>, ptr %A
25	%tmp2 = load <4 x i16>, ptr %B
26	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
27	ret <4 x i16> %tmp3
28}
29
30define <2 x i32> @vabds32(ptr %A, ptr %B) nounwind {
31; CHECK-LABEL: vabds32:
32; CHECK:       @ %bb.0:
33; CHECK-NEXT:    vldr d16, [r1]
34; CHECK-NEXT:    vldr d17, [r0]
35; CHECK-NEXT:    vabd.s32 d0, d17, d16
36; CHECK-NEXT:    bx lr
37	%tmp1 = load <2 x i32>, ptr %A
38	%tmp2 = load <2 x i32>, ptr %B
39	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
40	ret <2 x i32> %tmp3
41}
42
43define <8 x i8> @vabdu8(ptr %A, ptr %B) nounwind {
44; CHECK-LABEL: vabdu8:
45; CHECK:       @ %bb.0:
46; CHECK-NEXT:    vldr d16, [r1]
47; CHECK-NEXT:    vldr d17, [r0]
48; CHECK-NEXT:    vabd.u8 d0, d17, d16
49; CHECK-NEXT:    bx lr
50	%tmp1 = load <8 x i8>, ptr %A
51	%tmp2 = load <8 x i8>, ptr %B
52	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
53	ret <8 x i8> %tmp3
54}
55
56define <4 x i16> @vabdu16(ptr %A, ptr %B) nounwind {
57; CHECK-LABEL: vabdu16:
58; CHECK:       @ %bb.0:
59; CHECK-NEXT:    vldr d16, [r1]
60; CHECK-NEXT:    vldr d17, [r0]
61; CHECK-NEXT:    vabd.u16 d0, d17, d16
62; CHECK-NEXT:    bx lr
63	%tmp1 = load <4 x i16>, ptr %A
64	%tmp2 = load <4 x i16>, ptr %B
65	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
66	ret <4 x i16> %tmp3
67}
68
69define <2 x i32> @vabdu32(ptr %A, ptr %B) nounwind {
70; CHECK-LABEL: vabdu32:
71; CHECK:       @ %bb.0:
72; CHECK-NEXT:    vldr d16, [r1]
73; CHECK-NEXT:    vldr d17, [r0]
74; CHECK-NEXT:    vabd.u32 d0, d17, d16
75; CHECK-NEXT:    bx lr
76	%tmp1 = load <2 x i32>, ptr %A
77	%tmp2 = load <2 x i32>, ptr %B
78	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
79	ret <2 x i32> %tmp3
80}
81
82define <2 x float> @vabdf32(ptr %A, ptr %B) nounwind {
83; CHECK-LABEL: vabdf32:
84; CHECK:       @ %bb.0:
85; CHECK-NEXT:    vldr d16, [r1]
86; CHECK-NEXT:    vldr d17, [r0]
87; CHECK-NEXT:    vabd.f32 d0, d17, d16
88; CHECK-NEXT:    bx lr
89	%tmp1 = load <2 x float>, ptr %A
90	%tmp2 = load <2 x float>, ptr %B
91	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
92	ret <2 x float> %tmp3
93}
94
95define <16 x i8> @vabdQs8(ptr %A, ptr %B) nounwind {
96; CHECK-LABEL: vabdQs8:
97; CHECK:       @ %bb.0:
98; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
99; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
100; CHECK-NEXT:    vabd.s8 q0, q9, q8
101; CHECK-NEXT:    bx lr
102	%tmp1 = load <16 x i8>, ptr %A
103	%tmp2 = load <16 x i8>, ptr %B
104	%tmp3 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
105	ret <16 x i8> %tmp3
106}
107
108define <8 x i16> @vabdQs16(ptr %A, ptr %B) nounwind {
109; CHECK-LABEL: vabdQs16:
110; CHECK:       @ %bb.0:
111; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
112; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
113; CHECK-NEXT:    vabd.s16 q0, q9, q8
114; CHECK-NEXT:    bx lr
115	%tmp1 = load <8 x i16>, ptr %A
116	%tmp2 = load <8 x i16>, ptr %B
117	%tmp3 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
118	ret <8 x i16> %tmp3
119}
120
121define <4 x i32> @vabdQs32(ptr %A, ptr %B) nounwind {
122; CHECK-LABEL: vabdQs32:
123; CHECK:       @ %bb.0:
124; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
125; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
126; CHECK-NEXT:    vabd.s32 q0, q9, q8
127; CHECK-NEXT:    bx lr
128	%tmp1 = load <4 x i32>, ptr %A
129	%tmp2 = load <4 x i32>, ptr %B
130	%tmp3 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
131	ret <4 x i32> %tmp3
132}
133
134define <16 x i8> @vabdQu8(ptr %A, ptr %B) nounwind {
135; CHECK-LABEL: vabdQu8:
136; CHECK:       @ %bb.0:
137; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
138; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
139; CHECK-NEXT:    vabd.u8 q0, q9, q8
140; CHECK-NEXT:    bx lr
141	%tmp1 = load <16 x i8>, ptr %A
142	%tmp2 = load <16 x i8>, ptr %B
143	%tmp3 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
144	ret <16 x i8> %tmp3
145}
146
147define <8 x i16> @vabdQu16(ptr %A, ptr %B) nounwind {
148; CHECK-LABEL: vabdQu16:
149; CHECK:       @ %bb.0:
150; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
151; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
152; CHECK-NEXT:    vabd.u16 q0, q9, q8
153; CHECK-NEXT:    bx lr
154	%tmp1 = load <8 x i16>, ptr %A
155	%tmp2 = load <8 x i16>, ptr %B
156	%tmp3 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
157	ret <8 x i16> %tmp3
158}
159
160define <4 x i32> @vabdQu32(ptr %A, ptr %B) nounwind {
161; CHECK-LABEL: vabdQu32:
162; CHECK:       @ %bb.0:
163; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
164; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
165; CHECK-NEXT:    vabd.u32 q0, q9, q8
166; CHECK-NEXT:    bx lr
167	%tmp1 = load <4 x i32>, ptr %A
168	%tmp2 = load <4 x i32>, ptr %B
169	%tmp3 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
170	ret <4 x i32> %tmp3
171}
172
173define <4 x float> @vabdQf32(ptr %A, ptr %B) nounwind {
174; CHECK-LABEL: vabdQf32:
175; CHECK:       @ %bb.0:
176; CHECK-NEXT:    vld1.64 {d16, d17}, [r1]
177; CHECK-NEXT:    vld1.64 {d18, d19}, [r0]
178; CHECK-NEXT:    vabd.f32 q0, q9, q8
179; CHECK-NEXT:    bx lr
180	%tmp1 = load <4 x float>, ptr %A
181	%tmp2 = load <4 x float>, ptr %B
182	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
183	ret <4 x float> %tmp3
184}
185
186declare <8 x i8>  @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
187declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
188declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
189
190declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
191declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
192declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
193
194declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
195
196declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
197declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
198declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
199
200declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
201declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
202declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
203
204declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
205
206define <8 x i16> @vabdls8(ptr %A, ptr %B) nounwind {
207; CHECK-LABEL: vabdls8:
208; CHECK:       @ %bb.0:
209; CHECK-NEXT:    vldr d16, [r1]
210; CHECK-NEXT:    vldr d17, [r0]
211; CHECK-NEXT:    vabdl.s8 q0, d17, d16
212; CHECK-NEXT:    bx lr
213	%tmp1 = load <8 x i8>, ptr %A
214	%tmp2 = load <8 x i8>, ptr %B
215	%tmp3 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
216	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
217	ret <8 x i16> %tmp4
218}
219
220define <4 x i32> @vabdls16(ptr %A, ptr %B) nounwind {
221; CHECK-LABEL: vabdls16:
222; CHECK:       @ %bb.0:
223; CHECK-NEXT:    vldr d16, [r1]
224; CHECK-NEXT:    vldr d17, [r0]
225; CHECK-NEXT:    vabdl.s16 q0, d17, d16
226; CHECK-NEXT:    bx lr
227	%tmp1 = load <4 x i16>, ptr %A
228	%tmp2 = load <4 x i16>, ptr %B
229	%tmp3 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
230	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
231	ret <4 x i32> %tmp4
232}
233
234define <2 x i64> @vabdls32(ptr %A, ptr %B) nounwind {
235; CHECK-LABEL: vabdls32:
236; CHECK:       @ %bb.0:
237; CHECK-NEXT:    vldr d16, [r1]
238; CHECK-NEXT:    vldr d17, [r0]
239; CHECK-NEXT:    vabdl.s32 q0, d17, d16
240; CHECK-NEXT:    bx lr
241	%tmp1 = load <2 x i32>, ptr %A
242	%tmp2 = load <2 x i32>, ptr %B
243	%tmp3 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
244	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
245	ret <2 x i64> %tmp4
246}
247
248define <8 x i16> @vabdlu8(ptr %A, ptr %B) nounwind {
249; CHECK-LABEL: vabdlu8:
250; CHECK:       @ %bb.0:
251; CHECK-NEXT:    vldr d16, [r1]
252; CHECK-NEXT:    vldr d17, [r0]
253; CHECK-NEXT:    vabdl.u8 q0, d17, d16
254; CHECK-NEXT:    bx lr
255	%tmp1 = load <8 x i8>, ptr %A
256	%tmp2 = load <8 x i8>, ptr %B
257	%tmp3 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
258	%tmp4 = zext <8 x i8> %tmp3 to <8 x i16>
259	ret <8 x i16> %tmp4
260}
261
262define <4 x i32> @vabdlu16(ptr %A, ptr %B) nounwind {
263; CHECK-LABEL: vabdlu16:
264; CHECK:       @ %bb.0:
265; CHECK-NEXT:    vldr d16, [r1]
266; CHECK-NEXT:    vldr d17, [r0]
267; CHECK-NEXT:    vabdl.u16 q0, d17, d16
268; CHECK-NEXT:    bx lr
269	%tmp1 = load <4 x i16>, ptr %A
270	%tmp2 = load <4 x i16>, ptr %B
271	%tmp3 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
272	%tmp4 = zext <4 x i16> %tmp3 to <4 x i32>
273	ret <4 x i32> %tmp4
274}
275
276define <2 x i64> @vabdlu32(ptr %A, ptr %B) nounwind {
277; CHECK-LABEL: vabdlu32:
278; CHECK:       @ %bb.0:
279; CHECK-NEXT:    vldr d16, [r1]
280; CHECK-NEXT:    vldr d17, [r0]
281; CHECK-NEXT:    vabdl.u32 q0, d17, d16
282; CHECK-NEXT:    bx lr
283	%tmp1 = load <2 x i32>, ptr %A
284	%tmp2 = load <2 x i32>, ptr %B
285	%tmp3 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
286	%tmp4 = zext <2 x i32> %tmp3 to <2 x i64>
287	ret <2 x i64> %tmp4
288}
289