xref: /llvm-project/llvm/test/CodeGen/ARM/vsub.ll (revision 63dce70b794eb99ebbfdeed3ca9aafca2b8fe5c4)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=armv7a-eabi -mattr=+neon -float-abi=hard %s -o - | FileCheck %s
3
4define <8 x i8> @vsubi8(<8 x i8> %A, <8 x i8> %B) {
5; CHECK-LABEL: vsubi8:
6; CHECK:       @ %bb.0:
7; CHECK-NEXT:    vsub.i8 d0, d0, d1
8; CHECK-NEXT:    bx lr
9  %tmp3 = sub <8 x i8> %A, %B
10  ret <8 x i8> %tmp3
11}
12
13define <4 x i16> @vsubi16(<4 x i16> %A, <4 x i16> %B) {
14; CHECK-LABEL: vsubi16:
15; CHECK:       @ %bb.0:
16; CHECK-NEXT:    vsub.i16 d0, d0, d1
17; CHECK-NEXT:    bx lr
18  %tmp3 = sub <4 x i16> %A, %B
19  ret <4 x i16> %tmp3
20}
21
22define <2 x i32> @vsubi32(<2 x i32> %A, <2 x i32> %B) {
23; CHECK-LABEL: vsubi32:
24; CHECK:       @ %bb.0:
25; CHECK-NEXT:    vsub.i32 d0, d0, d1
26; CHECK-NEXT:    bx lr
27  %tmp3 = sub <2 x i32> %A, %B
28  ret <2 x i32> %tmp3
29}
30
31define <1 x i64> @vsubi64(<1 x i64> %A, <1 x i64> %B) {
32; CHECK-LABEL: vsubi64:
33; CHECK:       @ %bb.0:
34; CHECK-NEXT:    vsub.i64 d0, d0, d1
35; CHECK-NEXT:    bx lr
36  %tmp3 = sub <1 x i64> %A, %B
37  ret <1 x i64> %tmp3
38}
39
40define <2 x float> @vsubf32(<2 x float> %A, <2 x float> %B) {
41; CHECK-LABEL: vsubf32:
42; CHECK:       @ %bb.0:
43; CHECK-NEXT:    vsub.f32 d0, d0, d1
44; CHECK-NEXT:    bx lr
45  %tmp3 = fsub <2 x float> %A, %B
46  ret <2 x float> %tmp3
47}
48
49define <16 x i8> @vsubQi8(<16 x i8> %A, <16 x i8> %B) {
50; CHECK-LABEL: vsubQi8:
51; CHECK:       @ %bb.0:
52; CHECK-NEXT:    vsub.i8 q0, q0, q1
53; CHECK-NEXT:    bx lr
54  %tmp3 = sub <16 x i8> %A, %B
55  ret <16 x i8> %tmp3
56}
57
58define <8 x i16> @vsubQi16(<8 x i16> %A, <8 x i16> %B) {
59; CHECK-LABEL: vsubQi16:
60; CHECK:       @ %bb.0:
61; CHECK-NEXT:    vsub.i16 q0, q0, q1
62; CHECK-NEXT:    bx lr
63  %tmp3 = sub <8 x i16> %A, %B
64  ret <8 x i16> %tmp3
65}
66
67define <4 x i32> @vsubQi32(<4 x i32> %A, <4 x i32> %B) {
68; CHECK-LABEL: vsubQi32:
69; CHECK:       @ %bb.0:
70; CHECK-NEXT:    vsub.i32 q0, q0, q1
71; CHECK-NEXT:    bx lr
72  %tmp3 = sub <4 x i32> %A, %B
73  ret <4 x i32> %tmp3
74}
75
76define <2 x i64> @vsubQi64(<2 x i64> %A, <2 x i64> %B) {
77; CHECK-LABEL: vsubQi64:
78; CHECK:       @ %bb.0:
79; CHECK-NEXT:    vsub.i64 q0, q0, q1
80; CHECK-NEXT:    bx lr
81  %tmp3 = sub <2 x i64> %A, %B
82  ret <2 x i64> %tmp3
83}
84
85define <4 x float> @vsubQf32(<4 x float> %A, <4 x float> %B) {
86; CHECK-LABEL: vsubQf32:
87; CHECK:       @ %bb.0:
88; CHECK-NEXT:    vsub.f32 q0, q0, q1
89; CHECK-NEXT:    bx lr
90  %tmp3 = fsub <4 x float> %A, %B
91  ret <4 x float> %tmp3
92}
93
94define <8 x i8> @vrsubhni16(<8 x i16> %A, <8 x i16> %B) {
95; CHECK-LABEL: vrsubhni16:
96; CHECK:       @ %bb.0:
97; CHECK-NEXT:    vrsubhn.i16 d0, q0, q1
98; CHECK-NEXT:    bx lr
99  %tmp3 = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %A, <8 x i16> %B)
100  ret <8 x i8> %tmp3
101}
102
103define <4 x i16> @vrsubhni32(<4 x i32> %A, <4 x i32> %B) {
104; CHECK-LABEL: vrsubhni32:
105; CHECK:       @ %bb.0:
106; CHECK-NEXT:    vrsubhn.i32 d0, q0, q1
107; CHECK-NEXT:    bx lr
108  %tmp3 = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %A, <4 x i32> %B)
109  ret <4 x i16> %tmp3
110}
111
112define <2 x i32> @vrsubhni64(<2 x i64> %A, <2 x i64> %B) {
113; CHECK-LABEL: vrsubhni64:
114; CHECK:       @ %bb.0:
115; CHECK-NEXT:    vrsubhn.i64 d0, q0, q1
116; CHECK-NEXT:    bx lr
117  %tmp3 = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %A, <2 x i64> %B)
118  ret <2 x i32> %tmp3
119}
120
121declare <8 x i8>  @llvm.arm.neon.vrsubhn.v8i8(<8 x i16>, <8 x i16>) readnone
122declare <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32>, <4 x i32>) readnone
123declare <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64>, <2 x i64>) readnone
124
125define <8 x i8> @vsubhni16_natural(<8 x i16> %A, <8 x i16> %B) {
126; CHECK-LABEL: vsubhni16_natural:
127; CHECK:       @ %bb.0:
128; CHECK-NEXT:    vsubhn.i16 d0, q0, q1
129; CHECK-NEXT:    bx lr
130  %sum = sub <8 x i16> %A, %B
131  %shift = lshr <8 x i16> %sum, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
132  %trunc = trunc <8 x i16> %shift to <8 x i8>
133  ret <8 x i8> %trunc
134}
135
136define <4 x i16> @vsubhni32_natural(<4 x i32> %A, <4 x i32> %B) {
137; CHECK-LABEL: vsubhni32_natural:
138; CHECK:       @ %bb.0:
139; CHECK-NEXT:    vsubhn.i32 d0, q0, q1
140; CHECK-NEXT:    bx lr
141  %sum = sub <4 x i32> %A, %B
142  %shift = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
143  %trunc = trunc <4 x i32> %shift to <4 x i16>
144  ret <4 x i16> %trunc
145}
146
147define <2 x i32> @vsubhni64_natural(<2 x i64> %A, <2 x i64> %B) {
148; CHECK-LABEL: vsubhni64_natural:
149; CHECK:       @ %bb.0:
150; CHECK-NEXT:    vsubhn.i64 d0, q0, q1
151; CHECK-NEXT:    bx lr
152  %sum = sub <2 x i64> %A, %B
153  %shift = lshr <2 x i64> %sum, <i64 32, i64 32>
154  %trunc = trunc <2 x i64> %shift to <2 x i32>
155  ret <2 x i32> %trunc
156}
157
158define <8 x i16> @vsubls8(<8 x i8> %A, <8 x i8> %B) {
159; CHECK-LABEL: vsubls8:
160; CHECK:       @ %bb.0:
161; CHECK-NEXT:    vsubl.s8 q0, d0, d1
162; CHECK-NEXT:    bx lr
163  %tmp3 = sext <8 x i8> %A to <8 x i16>
164  %tmp4 = sext <8 x i8> %B to <8 x i16>
165  %tmp5 = sub <8 x i16> %tmp3, %tmp4
166  ret <8 x i16> %tmp5
167}
168
169define <4 x i32> @vsubls16(<4 x i16> %A, <4 x i16> %B) {
170; CHECK-LABEL: vsubls16:
171; CHECK:       @ %bb.0:
172; CHECK-NEXT:    vsubl.s16 q0, d0, d1
173; CHECK-NEXT:    bx lr
174  %tmp3 = sext <4 x i16> %A to <4 x i32>
175  %tmp4 = sext <4 x i16> %B to <4 x i32>
176  %tmp5 = sub <4 x i32> %tmp3, %tmp4
177  ret <4 x i32> %tmp5
178}
179
180define <2 x i64> @vsubls32(<2 x i32> %A, <2 x i32> %B) {
181; CHECK-LABEL: vsubls32:
182; CHECK:       @ %bb.0:
183; CHECK-NEXT:    vsubl.s32 q0, d0, d1
184; CHECK-NEXT:    bx lr
185  %tmp3 = sext <2 x i32> %A to <2 x i64>
186  %tmp4 = sext <2 x i32> %B to <2 x i64>
187  %tmp5 = sub <2 x i64> %tmp3, %tmp4
188  ret <2 x i64> %tmp5
189}
190
191define <8 x i16> @vsublu8(<8 x i8> %A, <8 x i8> %B) {
192; CHECK-LABEL: vsublu8:
193; CHECK:       @ %bb.0:
194; CHECK-NEXT:    vsubl.u8 q0, d0, d1
195; CHECK-NEXT:    bx lr
196  %tmp3 = zext <8 x i8> %A to <8 x i16>
197  %tmp4 = zext <8 x i8> %B to <8 x i16>
198  %tmp5 = sub <8 x i16> %tmp3, %tmp4
199  ret <8 x i16> %tmp5
200}
201
202define <4 x i32> @vsublu16(<4 x i16> %A, <4 x i16> %B) {
203; CHECK-LABEL: vsublu16:
204; CHECK:       @ %bb.0:
205; CHECK-NEXT:    vsubl.u16 q0, d0, d1
206; CHECK-NEXT:    bx lr
207  %tmp3 = zext <4 x i16> %A to <4 x i32>
208  %tmp4 = zext <4 x i16> %B to <4 x i32>
209  %tmp5 = sub <4 x i32> %tmp3, %tmp4
210  ret <4 x i32> %tmp5
211}
212
213define <2 x i64> @vsublu32(<2 x i32> %A, <2 x i32> %B) {
214; CHECK-LABEL: vsublu32:
215; CHECK:       @ %bb.0:
216; CHECK-NEXT:    vsubl.u32 q0, d0, d1
217; CHECK-NEXT:    bx lr
218  %tmp3 = zext <2 x i32> %A to <2 x i64>
219  %tmp4 = zext <2 x i32> %B to <2 x i64>
220  %tmp5 = sub <2 x i64> %tmp3, %tmp4
221  ret <2 x i64> %tmp5
222}
223
224define <8 x i16> @vsubla8(<8 x i8> %A, <8 x i8> %B) {
225; CHECK-LABEL: vsubla8:
226; CHECK:       @ %bb.0:
227; CHECK-NEXT:    vsubl.u8 q0, d0, d1
228; CHECK-NEXT:    vbic.i16 q0, #0xff00
229; CHECK-NEXT:    bx lr
230  %tmp3 = zext <8 x i8> %A to <8 x i16>
231  %tmp4 = zext <8 x i8> %B to <8 x i16>
232  %tmp5 = sub <8 x i16> %tmp3, %tmp4
233  %and = and <8 x i16> %tmp5, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
234  ret <8 x i16> %and
235}
236
237define <4 x i32> @vsubla16(<4 x i16> %A, <4 x i16> %B) {
238; CHECK-LABEL: vsubla16:
239; CHECK:       @ %bb.0:
240; CHECK-NEXT:    vmov.i32 q8, #0xffff
241; CHECK-NEXT:    vsubl.u16 q9, d0, d1
242; CHECK-NEXT:    vand q0, q9, q8
243; CHECK-NEXT:    bx lr
244  %tmp3 = zext <4 x i16> %A to <4 x i32>
245  %tmp4 = zext <4 x i16> %B to <4 x i32>
246  %tmp5 = sub <4 x i32> %tmp3, %tmp4
247  %and = and <4 x i32> %tmp5, <i32 65535, i32 65535, i32 65535, i32 65535>
248  ret <4 x i32> %and
249}
250
251define <2 x i64> @vsubla32(<2 x i32> %A, <2 x i32> %B) {
252; CHECK-LABEL: vsubla32:
253; CHECK:       @ %bb.0:
254; CHECK-NEXT:    vmov.i64 q8, #0xffffffff
255; CHECK-NEXT:    vsubl.u32 q9, d0, d1
256; CHECK-NEXT:    vand q0, q9, q8
257; CHECK-NEXT:    bx lr
258  %tmp3 = zext <2 x i32> %A to <2 x i64>
259  %tmp4 = zext <2 x i32> %B to <2 x i64>
260  %tmp5 = sub <2 x i64> %tmp3, %tmp4
261  %and = and <2 x i64> %tmp5, <i64 4294967295, i64 4294967295>
262  ret <2 x i64> %and
263}
264
265define <8 x i16> @vsubws8(<8 x i16> %A, <8 x i8> %B) {
266; CHECK-LABEL: vsubws8:
267; CHECK:       @ %bb.0:
268; CHECK-NEXT:    vsubw.s8 q0, q0, d2
269; CHECK-NEXT:    bx lr
270  %tmp3 = sext <8 x i8> %B to <8 x i16>
271  %tmp4 = sub <8 x i16> %A, %tmp3
272  ret <8 x i16> %tmp4
273}
274
275define <4 x i32> @vsubws16(<4 x i32> %A, <4 x i16> %B) {
276; CHECK-LABEL: vsubws16:
277; CHECK:       @ %bb.0:
278; CHECK-NEXT:    vsubw.s16 q0, q0, d2
279; CHECK-NEXT:    bx lr
280  %tmp3 = sext <4 x i16> %B to <4 x i32>
281  %tmp4 = sub <4 x i32> %A, %tmp3
282  ret <4 x i32> %tmp4
283}
284
285define <2 x i64> @vsubws32(<2 x i64> %A, <2 x i32> %B) {
286; CHECK-LABEL: vsubws32:
287; CHECK:       @ %bb.0:
288; CHECK-NEXT:    vsubw.s32 q0, q0, d2
289; CHECK-NEXT:    bx lr
290  %tmp3 = sext <2 x i32> %B to <2 x i64>
291  %tmp4 = sub <2 x i64> %A, %tmp3
292  ret <2 x i64> %tmp4
293}
294
295define <8 x i16> @vsubwu8(<8 x i16> %A, <8 x i8> %B) {
296; CHECK-LABEL: vsubwu8:
297; CHECK:       @ %bb.0:
298; CHECK-NEXT:    vsubw.u8 q0, q0, d2
299; CHECK-NEXT:    bx lr
300  %tmp3 = zext <8 x i8> %B to <8 x i16>
301  %tmp4 = sub <8 x i16> %A, %tmp3
302  ret <8 x i16> %tmp4
303}
304
305define <4 x i32> @vsubwu16(<4 x i32> %A, <4 x i16> %B) {
306; CHECK-LABEL: vsubwu16:
307; CHECK:       @ %bb.0:
308; CHECK-NEXT:    vsubw.u16 q0, q0, d2
309; CHECK-NEXT:    bx lr
310  %tmp3 = zext <4 x i16> %B to <4 x i32>
311  %tmp4 = sub <4 x i32> %A, %tmp3
312  ret <4 x i32> %tmp4
313}
314
315define <2 x i64> @vsubwu32(<2 x i64> %A, <2 x i32> %B) {
316; CHECK-LABEL: vsubwu32:
317; CHECK:       @ %bb.0:
318; CHECK-NEXT:    vsubw.u32 q0, q0, d2
319; CHECK-NEXT:    bx lr
320  %tmp3 = zext <2 x i32> %B to <2 x i64>
321  %tmp4 = sub <2 x i64> %A, %tmp3
322  ret <2 x i64> %tmp4
323}
324
325define <8 x i16> @vsubwa8(<8 x i16> %A, <8 x i8> %B) {
326; CHECK-LABEL: vsubwa8:
327; CHECK:       @ %bb.0:
328; CHECK-NEXT:    vsubw.u8 q0, q0, d2
329; CHECK-NEXT:    vbic.i16 q0, #0xff00
330; CHECK-NEXT:    bx lr
331  %tmp3 = zext <8 x i8> %B to <8 x i16>
332  %tmp4 = sub <8 x i16> %A, %tmp3
333  %and = and <8 x i16> %tmp4, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
334  ret <8 x i16> %and
335}
336
337define <4 x i32> @vsubwa16(<4 x i32> %A, <4 x i16> %B) {
338; CHECK-LABEL: vsubwa16:
339; CHECK:       @ %bb.0:
340; CHECK-NEXT:    vmov.i32 q8, #0xffff
341; CHECK-NEXT:    vsubw.u16 q9, q0, d2
342; CHECK-NEXT:    vand q0, q9, q8
343; CHECK-NEXT:    bx lr
344  %tmp3 = zext <4 x i16> %B to <4 x i32>
345  %tmp4 = sub <4 x i32> %A, %tmp3
346  %and = and <4 x i32> %tmp4, <i32 65535, i32 65535, i32 65535, i32 65535>
347  ret <4 x i32> %and
348}
349
350define <2 x i64> @vsubwa32(<2 x i64> %A, <2 x i32> %B) {
351; CHECK-LABEL: vsubwa32:
352; CHECK:       @ %bb.0:
353; CHECK-NEXT:    vmov.i64 q8, #0xffffffff
354; CHECK-NEXT:    vsubw.u32 q9, q0, d2
355; CHECK-NEXT:    vand q0, q9, q8
356; CHECK-NEXT:    bx lr
357  %tmp3 = zext <2 x i32> %B to <2 x i64>
358  %tmp4 = sub <2 x i64> %A, %tmp3
359  %and = and <2 x i64> %tmp4, <i64 4294967295, i64 4294967295>
360  ret <2 x i64> %and
361}
362