xref: /llvm-project/llvm/test/CodeGen/ARM/vaba.ll (revision ac02168990aa8429898d0c59fec7a78526638c5c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s
3
4define <8 x i8> @vabas8(ptr %A, ptr %B, ptr %C) nounwind {
5; CHECK-LABEL: vabas8:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vldr d16, [r2]
8; CHECK-NEXT:    vldr d17, [r1]
9; CHECK-NEXT:    vldr d0, [r0]
10; CHECK-NEXT:    vaba.s8 d0, d17, d16
11; CHECK-NEXT:    bx lr
12	%tmp1 = load <8 x i8>, ptr %A
13	%tmp2 = load <8 x i8>, ptr %B
14	%tmp3 = load <8 x i8>, ptr %C
15	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
16	%tmp5 = add <8 x i8> %tmp1, %tmp4
17	ret <8 x i8> %tmp5
18}
19
20define <4 x i16> @vabas16(ptr %A, ptr %B, ptr %C) nounwind {
21; CHECK-LABEL: vabas16:
22; CHECK:       @ %bb.0:
23; CHECK-NEXT:    vldr d16, [r2]
24; CHECK-NEXT:    vldr d17, [r1]
25; CHECK-NEXT:    vldr d0, [r0]
26; CHECK-NEXT:    vaba.s16 d0, d17, d16
27; CHECK-NEXT:    bx lr
28	%tmp1 = load <4 x i16>, ptr %A
29	%tmp2 = load <4 x i16>, ptr %B
30	%tmp3 = load <4 x i16>, ptr %C
31	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
32	%tmp5 = add <4 x i16> %tmp1, %tmp4
33	ret <4 x i16> %tmp5
34}
35
36define <2 x i32> @vabas32(ptr %A, ptr %B, ptr %C) nounwind {
37; CHECK-LABEL: vabas32:
38; CHECK:       @ %bb.0:
39; CHECK-NEXT:    vldr d16, [r2]
40; CHECK-NEXT:    vldr d17, [r1]
41; CHECK-NEXT:    vldr d0, [r0]
42; CHECK-NEXT:    vaba.s32 d0, d17, d16
43; CHECK-NEXT:    bx lr
44	%tmp1 = load <2 x i32>, ptr %A
45	%tmp2 = load <2 x i32>, ptr %B
46	%tmp3 = load <2 x i32>, ptr %C
47	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
48	%tmp5 = add <2 x i32> %tmp1, %tmp4
49	ret <2 x i32> %tmp5
50}
51
52define <8 x i8> @vabau8(ptr %A, ptr %B, ptr %C) nounwind {
53; CHECK-LABEL: vabau8:
54; CHECK:       @ %bb.0:
55; CHECK-NEXT:    vldr d16, [r2]
56; CHECK-NEXT:    vldr d17, [r1]
57; CHECK-NEXT:    vldr d0, [r0]
58; CHECK-NEXT:    vaba.u8 d0, d17, d16
59; CHECK-NEXT:    bx lr
60	%tmp1 = load <8 x i8>, ptr %A
61	%tmp2 = load <8 x i8>, ptr %B
62	%tmp3 = load <8 x i8>, ptr %C
63	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
64	%tmp5 = add <8 x i8> %tmp1, %tmp4
65	ret <8 x i8> %tmp5
66}
67
68define <4 x i16> @vabau16(ptr %A, ptr %B, ptr %C) nounwind {
69; CHECK-LABEL: vabau16:
70; CHECK:       @ %bb.0:
71; CHECK-NEXT:    vldr d16, [r2]
72; CHECK-NEXT:    vldr d17, [r1]
73; CHECK-NEXT:    vldr d0, [r0]
74; CHECK-NEXT:    vaba.u16 d0, d17, d16
75; CHECK-NEXT:    bx lr
76	%tmp1 = load <4 x i16>, ptr %A
77	%tmp2 = load <4 x i16>, ptr %B
78	%tmp3 = load <4 x i16>, ptr %C
79	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
80	%tmp5 = add <4 x i16> %tmp1, %tmp4
81	ret <4 x i16> %tmp5
82}
83
84define <2 x i32> @vabau32(ptr %A, ptr %B, ptr %C) nounwind {
85; CHECK-LABEL: vabau32:
86; CHECK:       @ %bb.0:
87; CHECK-NEXT:    vldr d16, [r2]
88; CHECK-NEXT:    vldr d17, [r1]
89; CHECK-NEXT:    vldr d0, [r0]
90; CHECK-NEXT:    vaba.u32 d0, d17, d16
91; CHECK-NEXT:    bx lr
92	%tmp1 = load <2 x i32>, ptr %A
93	%tmp2 = load <2 x i32>, ptr %B
94	%tmp3 = load <2 x i32>, ptr %C
95	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
96	%tmp5 = add <2 x i32> %tmp1, %tmp4
97	ret <2 x i32> %tmp5
98}
99
100define <16 x i8> @vabaQs8(ptr %A, ptr %B, ptr %C) nounwind {
101; CHECK-LABEL: vabaQs8:
102; CHECK:       @ %bb.0:
103; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
104; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
105; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
106; CHECK-NEXT:    vaba.s8 q0, q9, q8
107; CHECK-NEXT:    bx lr
108	%tmp1 = load <16 x i8>, ptr %A
109	%tmp2 = load <16 x i8>, ptr %B
110	%tmp3 = load <16 x i8>, ptr %C
111	%tmp4 = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
112	%tmp5 = add <16 x i8> %tmp1, %tmp4
113	ret <16 x i8> %tmp5
114}
115
116define <8 x i16> @vabaQs16(ptr %A, ptr %B, ptr %C) nounwind {
117; CHECK-LABEL: vabaQs16:
118; CHECK:       @ %bb.0:
119; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
120; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
121; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
122; CHECK-NEXT:    vaba.s16 q0, q9, q8
123; CHECK-NEXT:    bx lr
124	%tmp1 = load <8 x i16>, ptr %A
125	%tmp2 = load <8 x i16>, ptr %B
126	%tmp3 = load <8 x i16>, ptr %C
127	%tmp4 = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
128	%tmp5 = add <8 x i16> %tmp1, %tmp4
129	ret <8 x i16> %tmp5
130}
131
132define <4 x i32> @vabaQs32(ptr %A, ptr %B, ptr %C) nounwind {
133; CHECK-LABEL: vabaQs32:
134; CHECK:       @ %bb.0:
135; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
136; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
137; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
138; CHECK-NEXT:    vaba.s32 q0, q9, q8
139; CHECK-NEXT:    bx lr
140	%tmp1 = load <4 x i32>, ptr %A
141	%tmp2 = load <4 x i32>, ptr %B
142	%tmp3 = load <4 x i32>, ptr %C
143	%tmp4 = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
144	%tmp5 = add <4 x i32> %tmp1, %tmp4
145	ret <4 x i32> %tmp5
146}
147
148define <16 x i8> @vabaQu8(ptr %A, ptr %B, ptr %C) nounwind {
149; CHECK-LABEL: vabaQu8:
150; CHECK:       @ %bb.0:
151; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
152; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
153; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
154; CHECK-NEXT:    vaba.u8 q0, q9, q8
155; CHECK-NEXT:    bx lr
156	%tmp1 = load <16 x i8>, ptr %A
157	%tmp2 = load <16 x i8>, ptr %B
158	%tmp3 = load <16 x i8>, ptr %C
159	%tmp4 = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %tmp2, <16 x i8> %tmp3)
160	%tmp5 = add <16 x i8> %tmp1, %tmp4
161	ret <16 x i8> %tmp5
162}
163
164define <8 x i16> @vabaQu16(ptr %A, ptr %B, ptr %C) nounwind {
165; CHECK-LABEL: vabaQu16:
166; CHECK:       @ %bb.0:
167; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
168; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
169; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
170; CHECK-NEXT:    vaba.u16 q0, q9, q8
171; CHECK-NEXT:    bx lr
172	%tmp1 = load <8 x i16>, ptr %A
173	%tmp2 = load <8 x i16>, ptr %B
174	%tmp3 = load <8 x i16>, ptr %C
175	%tmp4 = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp3)
176	%tmp5 = add <8 x i16> %tmp1, %tmp4
177	ret <8 x i16> %tmp5
178}
179
180define <4 x i32> @vabaQu32(ptr %A, ptr %B, ptr %C) nounwind {
181; CHECK-LABEL: vabaQu32:
182; CHECK:       @ %bb.0:
183; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
184; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
185; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
186; CHECK-NEXT:    vaba.u32 q0, q9, q8
187; CHECK-NEXT:    bx lr
188	%tmp1 = load <4 x i32>, ptr %A
189	%tmp2 = load <4 x i32>, ptr %B
190	%tmp3 = load <4 x i32>, ptr %C
191	%tmp4 = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3)
192	%tmp5 = add <4 x i32> %tmp1, %tmp4
193	ret <4 x i32> %tmp5
194}
195
196declare <8 x i8>  @llvm.arm.neon.vabds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
197declare <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
198declare <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
199
200declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
201declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
202declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
203
204declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
205declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
206declare <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
207
208declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
209declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
210declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
211
212define <8 x i16> @vabals8(ptr %A, ptr %B, ptr %C) nounwind {
213; CHECK-LABEL: vabals8:
214; CHECK:       @ %bb.0:
215; CHECK-NEXT:    vldr d16, [r2]
216; CHECK-NEXT:    vldr d17, [r1]
217; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
218; CHECK-NEXT:    vabal.s8 q0, d17, d16
219; CHECK-NEXT:    bx lr
220	%tmp1 = load <8 x i16>, ptr %A
221	%tmp2 = load <8 x i8>, ptr %B
222	%tmp3 = load <8 x i8>, ptr %C
223	%tmp4 = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
224	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
225	%tmp6 = add <8 x i16> %tmp1, %tmp5
226	ret <8 x i16> %tmp6
227}
228
229define <4 x i32> @vabals16(ptr %A, ptr %B, ptr %C) nounwind {
230; CHECK-LABEL: vabals16:
231; CHECK:       @ %bb.0:
232; CHECK-NEXT:    vldr d16, [r2]
233; CHECK-NEXT:    vldr d17, [r1]
234; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
235; CHECK-NEXT:    vabal.s16 q0, d17, d16
236; CHECK-NEXT:    bx lr
237	%tmp1 = load <4 x i32>, ptr %A
238	%tmp2 = load <4 x i16>, ptr %B
239	%tmp3 = load <4 x i16>, ptr %C
240	%tmp4 = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
241	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
242	%tmp6 = add <4 x i32> %tmp1, %tmp5
243	ret <4 x i32> %tmp6
244}
245
246define <2 x i64> @vabals32(ptr %A, ptr %B, ptr %C) nounwind {
247; CHECK-LABEL: vabals32:
248; CHECK:       @ %bb.0:
249; CHECK-NEXT:    vldr d16, [r2]
250; CHECK-NEXT:    vldr d17, [r1]
251; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
252; CHECK-NEXT:    vabal.s32 q0, d17, d16
253; CHECK-NEXT:    bx lr
254	%tmp1 = load <2 x i64>, ptr %A
255	%tmp2 = load <2 x i32>, ptr %B
256	%tmp3 = load <2 x i32>, ptr %C
257	%tmp4 = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
258	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
259	%tmp6 = add <2 x i64> %tmp1, %tmp5
260	ret <2 x i64> %tmp6
261}
262
263define <8 x i16> @vabalu8(ptr %A, ptr %B, ptr %C) nounwind {
264; CHECK-LABEL: vabalu8:
265; CHECK:       @ %bb.0:
266; CHECK-NEXT:    vldr d16, [r2]
267; CHECK-NEXT:    vldr d17, [r1]
268; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
269; CHECK-NEXT:    vabal.u8 q0, d17, d16
270; CHECK-NEXT:    bx lr
271	%tmp1 = load <8 x i16>, ptr %A
272	%tmp2 = load <8 x i8>, ptr %B
273	%tmp3 = load <8 x i8>, ptr %C
274	%tmp4 = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %tmp2, <8 x i8> %tmp3)
275	%tmp5 = zext <8 x i8> %tmp4 to <8 x i16>
276	%tmp6 = add <8 x i16> %tmp1, %tmp5
277	ret <8 x i16> %tmp6
278}
279
280define <4 x i32> @vabalu16(ptr %A, ptr %B, ptr %C) nounwind {
281; CHECK-LABEL: vabalu16:
282; CHECK:       @ %bb.0:
283; CHECK-NEXT:    vldr d16, [r2]
284; CHECK-NEXT:    vldr d17, [r1]
285; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
286; CHECK-NEXT:    vabal.u16 q0, d17, d16
287; CHECK-NEXT:    bx lr
288	%tmp1 = load <4 x i32>, ptr %A
289	%tmp2 = load <4 x i16>, ptr %B
290	%tmp3 = load <4 x i16>, ptr %C
291	%tmp4 = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %tmp2, <4 x i16> %tmp3)
292	%tmp5 = zext <4 x i16> %tmp4 to <4 x i32>
293	%tmp6 = add <4 x i32> %tmp1, %tmp5
294	ret <4 x i32> %tmp6
295}
296
297define <2 x i64> @vabalu32(ptr %A, ptr %B, ptr %C) nounwind {
298; CHECK-LABEL: vabalu32:
299; CHECK:       @ %bb.0:
300; CHECK-NEXT:    vldr d16, [r2]
301; CHECK-NEXT:    vldr d17, [r1]
302; CHECK-NEXT:    vld1.64 {d0, d1}, [r0]
303; CHECK-NEXT:    vabal.u32 q0, d17, d16
304; CHECK-NEXT:    bx lr
305	%tmp1 = load <2 x i64>, ptr %A
306	%tmp2 = load <2 x i32>, ptr %B
307	%tmp3 = load <2 x i32>, ptr %C
308	%tmp4 = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %tmp2, <2 x i32> %tmp3)
309	%tmp5 = zext <2 x i32> %tmp4 to <2 x i64>
310	%tmp6 = add <2 x i64> %tmp1, %tmp5
311	ret <2 x i64> %tmp6
312}
313