xref: /llvm-project/llvm/test/CodeGen/ARM/neon_vabd.ll (revision e0ed0333f0fed2e73f805afd58b61176a87aa3ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=armv7a-eabihf -mattr=+neon %s -o - | FileCheck %s
3
4;
5; SABD
6;
7
8define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) {
9; CHECK-LABEL: sabd_8b:
10; CHECK:       @ %bb.0:
11; CHECK-NEXT:    vabd.s8 d0, d0, d1
12; CHECK-NEXT:    bx lr
13  %a.sext = sext <8 x i8> %a to <8 x i16>
14  %b.sext = sext <8 x i8> %b to <8 x i16>
15  %sub = sub <8 x i16> %a.sext, %b.sext
16  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
17  %trunc = trunc <8 x i16> %abs to <8 x i8>
18  ret <8 x i8> %trunc
19}
20
21define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) {
22; CHECK-LABEL: sabd_16b:
23; CHECK:       @ %bb.0:
24; CHECK-NEXT:    vabd.s8 q0, q0, q1
25; CHECK-NEXT:    bx lr
26  %a.sext = sext <16 x i8> %a to <16 x i16>
27  %b.sext = sext <16 x i8> %b to <16 x i16>
28  %sub = sub <16 x i16> %a.sext, %b.sext
29  %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
30  %trunc = trunc <16 x i16> %abs to <16 x i8>
31  ret <16 x i8> %trunc
32}
33
34define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) {
35; CHECK-LABEL: sabd_4h:
36; CHECK:       @ %bb.0:
37; CHECK-NEXT:    vabd.s16 d0, d0, d1
38; CHECK-NEXT:    bx lr
39  %a.sext = sext <4 x i16> %a to <4 x i32>
40  %b.sext = sext <4 x i16> %b to <4 x i32>
41  %sub = sub <4 x i32> %a.sext, %b.sext
42  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
43  %trunc = trunc <4 x i32> %abs to <4 x i16>
44  ret <4 x i16> %trunc
45}
46
47define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
48; CHECK-LABEL: sabd_4h_promoted_ops:
49; CHECK:       @ %bb.0:
50; CHECK-NEXT:    vshl.i16 d16, d1, #8
51; CHECK-NEXT:    vshl.i16 d17, d0, #8
52; CHECK-NEXT:    vshr.s16 d16, d16, #8
53; CHECK-NEXT:    vshr.s16 d17, d17, #8
54; CHECK-NEXT:    vabd.s16 d0, d17, d16
55; CHECK-NEXT:    bx lr
56  %a.sext = sext <4 x i8> %a to <4 x i16>
57  %b.sext = sext <4 x i8> %b to <4 x i16>
58  %sub = sub <4 x i16> %a.sext, %b.sext
59  %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
60  ret <4 x i16> %abs
61}
62
63define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) {
64; CHECK-LABEL: sabd_8h:
65; CHECK:       @ %bb.0:
66; CHECK-NEXT:    vabd.s16 q0, q0, q1
67; CHECK-NEXT:    bx lr
68  %a.sext = sext <8 x i16> %a to <8 x i32>
69  %b.sext = sext <8 x i16> %b to <8 x i32>
70  %sub = sub <8 x i32> %a.sext, %b.sext
71  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
72  %trunc = trunc <8 x i32> %abs to <8 x i16>
73  ret <8 x i16> %trunc
74}
75
76define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
77; CHECK-LABEL: sabd_8h_promoted_ops:
78; CHECK:       @ %bb.0:
79; CHECK-NEXT:    vabdl.s8 q0, d0, d1
80; CHECK-NEXT:    bx lr
81  %a.sext = sext <8 x i8> %a to <8 x i16>
82  %b.sext = sext <8 x i8> %b to <8 x i16>
83  %sub = sub <8 x i16> %a.sext, %b.sext
84  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
85  ret <8 x i16> %abs
86}
87
88define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) {
89; CHECK-LABEL: sabd_2s:
90; CHECK:       @ %bb.0:
91; CHECK-NEXT:    vabd.s32 d0, d0, d1
92; CHECK-NEXT:    bx lr
93  %a.sext = sext <2 x i32> %a to <2 x i64>
94  %b.sext = sext <2 x i32> %b to <2 x i64>
95  %sub = sub <2 x i64> %a.sext, %b.sext
96  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
97  %trunc = trunc <2 x i64> %abs to <2 x i32>
98  ret <2 x i32> %trunc
99}
100
101define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
102; CHECK-LABEL: sabd_2s_promoted_ops:
103; CHECK:       @ %bb.0:
104; CHECK-NEXT:    vshl.i32 d16, d1, #16
105; CHECK-NEXT:    vshl.i32 d17, d0, #16
106; CHECK-NEXT:    vshr.s32 d16, d16, #16
107; CHECK-NEXT:    vshr.s32 d17, d17, #16
108; CHECK-NEXT:    vabd.s32 d0, d17, d16
109; CHECK-NEXT:    bx lr
110  %a.sext = sext <2 x i16> %a to <2 x i32>
111  %b.sext = sext <2 x i16> %b to <2 x i32>
112  %sub = sub <2 x i32> %a.sext, %b.sext
113  %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
114  ret <2 x i32> %abs
115}
116
117define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) {
118; CHECK-LABEL: sabd_4s:
119; CHECK:       @ %bb.0:
120; CHECK-NEXT:    vabd.s32 q0, q0, q1
121; CHECK-NEXT:    bx lr
122  %a.sext = sext <4 x i32> %a to <4 x i64>
123  %b.sext = sext <4 x i32> %b to <4 x i64>
124  %sub = sub <4 x i64> %a.sext, %b.sext
125  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
126  %trunc = trunc <4 x i64> %abs to <4 x i32>
127  ret <4 x i32> %trunc
128}
129
130define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
131; CHECK-LABEL: sabd_4s_promoted_ops:
132; CHECK:       @ %bb.0:
133; CHECK-NEXT:    vabdl.s16 q0, d0, d1
134; CHECK-NEXT:    bx lr
135  %a.sext = sext <4 x i16> %a to <4 x i32>
136  %b.sext = sext <4 x i16> %b to <4 x i32>
137  %sub = sub <4 x i32> %a.sext, %b.sext
138  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
139  ret <4 x i32> %abs
140}
141
142define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) {
143; CHECK-LABEL: sabd_2d:
144; CHECK:       @ %bb.0:
145; CHECK-NEXT:    .save {r4, r5, r6, lr}
146; CHECK-NEXT:    push {r4, r5, r6, lr}
147; CHECK-NEXT:    vmov r0, r1, d1
148; CHECK-NEXT:    mov r6, #0
149; CHECK-NEXT:    vmov r2, r3, d3
150; CHECK-NEXT:    vmov r12, lr, d0
151; CHECK-NEXT:    vmov r4, r5, d2
152; CHECK-NEXT:    vsub.i64 q8, q0, q1
153; CHECK-NEXT:    subs r0, r2, r0
154; CHECK-NEXT:    sbcs r0, r3, r1
155; CHECK-NEXT:    mov r0, #0
156; CHECK-NEXT:    movwlt r0, #1
157; CHECK-NEXT:    cmp r0, #0
158; CHECK-NEXT:    mvnne r0, #0
159; CHECK-NEXT:    subs r1, r4, r12
160; CHECK-NEXT:    sbcs r1, r5, lr
161; CHECK-NEXT:    vdup.32 d19, r0
162; CHECK-NEXT:    movwlt r6, #1
163; CHECK-NEXT:    cmp r6, #0
164; CHECK-NEXT:    mvnne r6, #0
165; CHECK-NEXT:    vdup.32 d18, r6
166; CHECK-NEXT:    veor q8, q8, q9
167; CHECK-NEXT:    vsub.i64 q0, q9, q8
168; CHECK-NEXT:    pop {r4, r5, r6, pc}
169  %a.sext = sext <2 x i64> %a to <2 x i128>
170  %b.sext = sext <2 x i64> %b to <2 x i128>
171  %sub = sub <2 x i128> %a.sext, %b.sext
172  %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
173  %trunc = trunc <2 x i128> %abs to <2 x i64>
174  ret <2 x i64> %trunc
175}
176
177define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
178; CHECK-LABEL: sabd_2d_promoted_ops:
179; CHECK:       @ %bb.0:
180; CHECK-NEXT:    vabdl.s32 q0, d0, d1
181; CHECK-NEXT:    bx lr
182  %a.sext = sext <2 x i32> %a to <2 x i64>
183  %b.sext = sext <2 x i32> %b to <2 x i64>
184  %sub = sub <2 x i64> %a.sext, %b.sext
185  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
186  ret <2 x i64> %abs
187}
188
189;
190; UABD
191;
192
193define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) {
194; CHECK-LABEL: uabd_8b:
195; CHECK:       @ %bb.0:
196; CHECK-NEXT:    vabd.u8 d0, d0, d1
197; CHECK-NEXT:    bx lr
198  %a.zext = zext <8 x i8> %a to <8 x i16>
199  %b.zext = zext <8 x i8> %b to <8 x i16>
200  %sub = sub <8 x i16> %a.zext, %b.zext
201  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
202  %trunc = trunc <8 x i16> %abs to <8 x i8>
203  ret <8 x i8> %trunc
204}
205
206define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) {
207; CHECK-LABEL: uabd_16b:
208; CHECK:       @ %bb.0:
209; CHECK-NEXT:    vabd.u8 q0, q0, q1
210; CHECK-NEXT:    bx lr
211  %a.zext = zext <16 x i8> %a to <16 x i16>
212  %b.zext = zext <16 x i8> %b to <16 x i16>
213  %sub = sub <16 x i16> %a.zext, %b.zext
214  %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
215  %trunc = trunc <16 x i16> %abs to <16 x i8>
216  ret <16 x i8> %trunc
217}
218
219define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) {
220; CHECK-LABEL: uabd_4h:
221; CHECK:       @ %bb.0:
222; CHECK-NEXT:    vabd.u16 d0, d0, d1
223; CHECK-NEXT:    bx lr
224  %a.zext = zext <4 x i16> %a to <4 x i32>
225  %b.zext = zext <4 x i16> %b to <4 x i32>
226  %sub = sub <4 x i32> %a.zext, %b.zext
227  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
228  %trunc = trunc <4 x i32> %abs to <4 x i16>
229  ret <4 x i16> %trunc
230}
231
232define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) {
233; CHECK-LABEL: uabd_4h_promoted_ops:
234; CHECK:       @ %bb.0:
235; CHECK-NEXT:    vbic.i16 d1, #0xff00
236; CHECK-NEXT:    vbic.i16 d0, #0xff00
237; CHECK-NEXT:    vabd.u16 d0, d0, d1
238; CHECK-NEXT:    bx lr
239  %a.zext = zext <4 x i8> %a to <4 x i16>
240  %b.zext = zext <4 x i8> %b to <4 x i16>
241  %sub = sub <4 x i16> %a.zext, %b.zext
242  %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
243  ret <4 x i16> %abs
244}
245
246define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) {
247; CHECK-LABEL: uabd_8h:
248; CHECK:       @ %bb.0:
249; CHECK-NEXT:    vabd.u16 q0, q0, q1
250; CHECK-NEXT:    bx lr
251  %a.zext = zext <8 x i16> %a to <8 x i32>
252  %b.zext = zext <8 x i16> %b to <8 x i32>
253  %sub = sub <8 x i32> %a.zext, %b.zext
254  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
255  %trunc = trunc <8 x i32> %abs to <8 x i16>
256  ret <8 x i16> %trunc
257}
258
259define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) {
260; CHECK-LABEL: uabd_8h_promoted_ops:
261; CHECK:       @ %bb.0:
262; CHECK-NEXT:    vabdl.u8 q0, d0, d1
263; CHECK-NEXT:    bx lr
264  %a.zext = zext <8 x i8> %a to <8 x i16>
265  %b.zext = zext <8 x i8> %b to <8 x i16>
266  %sub = sub <8 x i16> %a.zext, %b.zext
267  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
268  ret <8 x i16> %abs
269}
270
271define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) {
272; CHECK-LABEL: uabd_2s:
273; CHECK:       @ %bb.0:
274; CHECK-NEXT:    vabd.u32 d0, d0, d1
275; CHECK-NEXT:    bx lr
276  %a.zext = zext <2 x i32> %a to <2 x i64>
277  %b.zext = zext <2 x i32> %b to <2 x i64>
278  %sub = sub <2 x i64> %a.zext, %b.zext
279  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
280  %trunc = trunc <2 x i64> %abs to <2 x i32>
281  ret <2 x i32> %trunc
282}
283
284define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) {
285; CHECK-LABEL: uabd_2s_promoted_ops:
286; CHECK:       @ %bb.0:
287; CHECK-NEXT:    vmov.i32 d16, #0xffff
288; CHECK-NEXT:    vand d17, d1, d16
289; CHECK-NEXT:    vand d16, d0, d16
290; CHECK-NEXT:    vabd.u32 d0, d16, d17
291; CHECK-NEXT:    bx lr
292  %a.zext = zext <2 x i16> %a to <2 x i32>
293  %b.zext = zext <2 x i16> %b to <2 x i32>
294  %sub = sub <2 x i32> %a.zext, %b.zext
295  %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
296  ret <2 x i32> %abs
297}
298
299define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) {
300; CHECK-LABEL: uabd_4s:
301; CHECK:       @ %bb.0:
302; CHECK-NEXT:    vabd.u32 q0, q0, q1
303; CHECK-NEXT:    bx lr
304  %a.zext = zext <4 x i32> %a to <4 x i64>
305  %b.zext = zext <4 x i32> %b to <4 x i64>
306  %sub = sub <4 x i64> %a.zext, %b.zext
307  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
308  %trunc = trunc <4 x i64> %abs to <4 x i32>
309  ret <4 x i32> %trunc
310}
311
312define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) {
313; CHECK-LABEL: uabd_4s_promoted_ops:
314; CHECK:       @ %bb.0:
315; CHECK-NEXT:    vabdl.u16 q0, d0, d1
316; CHECK-NEXT:    bx lr
317  %a.zext = zext <4 x i16> %a to <4 x i32>
318  %b.zext = zext <4 x i16> %b to <4 x i32>
319  %sub = sub <4 x i32> %a.zext, %b.zext
320  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
321  ret <4 x i32> %abs
322}
323
324define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) {
325; CHECK-LABEL: uabd_2d:
326; CHECK:       @ %bb.0:
327; CHECK-NEXT:    vqsub.u64 q8, q1, q0
328; CHECK-NEXT:    vqsub.u64 q9, q0, q1
329; CHECK-NEXT:    vorr q0, q9, q8
330; CHECK-NEXT:    bx lr
331  %a.zext = zext <2 x i64> %a to <2 x i128>
332  %b.zext = zext <2 x i64> %b to <2 x i128>
333  %sub = sub <2 x i128> %a.zext, %b.zext
334  %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
335  %trunc = trunc <2 x i128> %abs to <2 x i64>
336  ret <2 x i64> %trunc
337}
338
339define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) {
340; CHECK-LABEL: uabd_2d_promoted_ops:
341; CHECK:       @ %bb.0:
342; CHECK-NEXT:    vabdl.u32 q0, d0, d1
343; CHECK-NEXT:    bx lr
344  %a.zext = zext <2 x i32> %a to <2 x i64>
345  %b.zext = zext <2 x i32> %b to <2 x i64>
346  %sub = sub <2 x i64> %a.zext, %b.zext
347  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
348  ret <2 x i64> %abs
349}
350
351define <16 x i8> @uabd_v16i8_nuw(<16 x i8> %a, <16 x i8> %b) {
352; CHECK-LABEL: uabd_v16i8_nuw:
353; CHECK:       @ %bb.0:
354; CHECK-NEXT:    vsub.i8 q8, q0, q1
355; CHECK-NEXT:    vabs.s8 q0, q8
356; CHECK-NEXT:    bx lr
357  %sub = sub nuw <16 x i8> %a, %b
358  %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
359  ret <16 x i8> %abs
360}
361
362define <8 x i16> @uabd_v8i16_nuw(<8 x i16> %a, <8 x i16> %b) {
363; CHECK-LABEL: uabd_v8i16_nuw:
364; CHECK:       @ %bb.0:
365; CHECK-NEXT:    vsub.i16 q8, q0, q1
366; CHECK-NEXT:    vabs.s16 q0, q8
367; CHECK-NEXT:    bx lr
368  %sub = sub nuw <8 x i16> %a, %b
369  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
370  ret <8 x i16> %abs
371}
372
373define <4 x i32> @uabd_v4i32_nuw(<4 x i32> %a, <4 x i32> %b) {
374; CHECK-LABEL: uabd_v4i32_nuw:
375; CHECK:       @ %bb.0:
376; CHECK-NEXT:    vsub.i32 q8, q0, q1
377; CHECK-NEXT:    vabs.s32 q0, q8
378; CHECK-NEXT:    bx lr
379  %sub = sub nuw <4 x i32> %a, %b
380  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
381  ret <4 x i32> %abs
382}
383
384define <2 x i64> @uabd_v2i64_nuw(<2 x i64> %a, <2 x i64> %b) {
385; CHECK-LABEL: uabd_v2i64_nuw:
386; CHECK:       @ %bb.0:
387; CHECK-NEXT:    vsub.i64 q8, q0, q1
388; CHECK-NEXT:    vshr.s64 q9, q8, #63
389; CHECK-NEXT:    veor q8, q8, q9
390; CHECK-NEXT:    vsub.i64 q0, q8, q9
391; CHECK-NEXT:    bx lr
392  %sub = sub nuw <2 x i64> %a, %b
393  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
394  ret <2 x i64> %abs
395}
396
397define <16 x i8> @sabd_v16i8_nsw(<16 x i8> %a, <16 x i8> %b) {
398; CHECK-LABEL: sabd_v16i8_nsw:
399; CHECK:       @ %bb.0:
400; CHECK-NEXT:    vabd.s8 q0, q0, q1
401; CHECK-NEXT:    bx lr
402  %sub = sub nsw <16 x i8> %a, %b
403  %abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
404  ret <16 x i8> %abs
405}
406
407define <8 x i16> @sabd_v8i16_nsw(<8 x i16> %a, <8 x i16> %b) {
408; CHECK-LABEL: sabd_v8i16_nsw:
409; CHECK:       @ %bb.0:
410; CHECK-NEXT:    vabd.s16 q0, q0, q1
411; CHECK-NEXT:    bx lr
412  %sub = sub nsw <8 x i16> %a, %b
413  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
414  ret <8 x i16> %abs
415}
416
417define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) {
418; CHECK-LABEL: sabd_v4i32_nsw:
419; CHECK:       @ %bb.0:
420; CHECK-NEXT:    vabd.s32 q0, q0, q1
421; CHECK-NEXT:    bx lr
422  %sub = sub nsw <4 x i32> %a, %b
423  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
424  ret <4 x i32> %abs
425}
426
427define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) {
428; CHECK-LABEL: sabd_v2i64_nsw:
429; CHECK:       @ %bb.0:
430; CHECK-NEXT:    vsub.i64 q8, q0, q1
431; CHECK-NEXT:    vshr.s64 q9, q8, #63
432; CHECK-NEXT:    veor q8, q8, q9
433; CHECK-NEXT:    vsub.i64 q0, q8, q9
434; CHECK-NEXT:    bx lr
435  %sub = sub nsw <2 x i64> %a, %b
436  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
437  ret <2 x i64> %abs
438}
439
440define <16 x i8> @smaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
441; CHECK-LABEL: smaxmin_v16i8:
442; CHECK:       @ %bb.0:
443; CHECK-NEXT:    vabd.s8 q0, q0, q1
444; CHECK-NEXT:    bx lr
445  %a = tail call <16 x i8> @llvm.smax.v16i8(<16 x i8> %0, <16 x i8> %1)
446  %b = tail call <16 x i8> @llvm.smin.v16i8(<16 x i8> %0, <16 x i8> %1)
447  %sub = sub <16 x i8> %a, %b
448  ret <16 x i8> %sub
449}
450
451define <8 x i16> @smaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
452; CHECK-LABEL: smaxmin_v8i16:
453; CHECK:       @ %bb.0:
454; CHECK-NEXT:    vabd.s16 q0, q0, q1
455; CHECK-NEXT:    bx lr
456  %a = tail call <8 x i16> @llvm.smax.v8i16(<8 x i16> %0, <8 x i16> %1)
457  %b = tail call <8 x i16> @llvm.smin.v8i16(<8 x i16> %0, <8 x i16> %1)
458  %sub = sub <8 x i16> %a, %b
459  ret <8 x i16> %sub
460}
461
462define <4 x i32> @smaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
463; CHECK-LABEL: smaxmin_v4i32:
464; CHECK:       @ %bb.0:
465; CHECK-NEXT:    vabd.s32 q0, q0, q1
466; CHECK-NEXT:    bx lr
467  %a = tail call <4 x i32> @llvm.smax.v4i32(<4 x i32> %0, <4 x i32> %1)
468  %b = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %0, <4 x i32> %1)
469  %sub = sub <4 x i32> %a, %b
470  ret <4 x i32> %sub
471}
472
473define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
474; CHECK-LABEL: smaxmin_v2i64:
475; CHECK:       @ %bb.0:
476; CHECK-NEXT:    .save {r4, r5, r6, lr}
477; CHECK-NEXT:    push {r4, r5, r6, lr}
478; CHECK-NEXT:    vmov r0, r1, d1
479; CHECK-NEXT:    mov r6, #0
480; CHECK-NEXT:    vmov r2, r3, d3
481; CHECK-NEXT:    vmov r12, lr, d0
482; CHECK-NEXT:    vmov r4, r5, d2
483; CHECK-NEXT:    vsub.i64 q8, q0, q1
484; CHECK-NEXT:    subs r0, r2, r0
485; CHECK-NEXT:    sbcs r0, r3, r1
486; CHECK-NEXT:    mov r0, #0
487; CHECK-NEXT:    movwlt r0, #1
488; CHECK-NEXT:    cmp r0, #0
489; CHECK-NEXT:    mvnne r0, #0
490; CHECK-NEXT:    subs r1, r4, r12
491; CHECK-NEXT:    sbcs r1, r5, lr
492; CHECK-NEXT:    vdup.32 d19, r0
493; CHECK-NEXT:    movwlt r6, #1
494; CHECK-NEXT:    cmp r6, #0
495; CHECK-NEXT:    mvnne r6, #0
496; CHECK-NEXT:    vdup.32 d18, r6
497; CHECK-NEXT:    veor q8, q8, q9
498; CHECK-NEXT:    vsub.i64 q0, q9, q8
499; CHECK-NEXT:    pop {r4, r5, r6, pc}
500  %a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
501  %b = tail call <2 x i64> @llvm.smin.v2i64(<2 x i64> %0, <2 x i64> %1)
502  %sub = sub <2 x i64> %a, %b
503  ret <2 x i64> %sub
504}
505
506define <16 x i8> @umaxmin_v16i8(<16 x i8> %0, <16 x i8> %1) {
507; CHECK-LABEL: umaxmin_v16i8:
508; CHECK:       @ %bb.0:
509; CHECK-NEXT:    vabd.u8 q0, q0, q1
510; CHECK-NEXT:    bx lr
511  %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
512  %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %0, <16 x i8> %1)
513  %sub = sub <16 x i8> %a, %b
514  ret <16 x i8> %sub
515}
516
517define <8 x i16> @umaxmin_v8i16(<8 x i16> %0, <8 x i16> %1) {
518; CHECK-LABEL: umaxmin_v8i16:
519; CHECK:       @ %bb.0:
520; CHECK-NEXT:    vabd.u16 q0, q0, q1
521; CHECK-NEXT:    bx lr
522  %a = tail call <8 x i16> @llvm.umax.v8i16(<8 x i16> %0, <8 x i16> %1)
523  %b = tail call <8 x i16> @llvm.umin.v8i16(<8 x i16> %0, <8 x i16> %1)
524  %sub = sub <8 x i16> %a, %b
525  ret <8 x i16> %sub
526}
527
528define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
529; CHECK-LABEL: umaxmin_v4i32:
530; CHECK:       @ %bb.0:
531; CHECK-NEXT:    vabd.u32 q0, q0, q1
532; CHECK-NEXT:    bx lr
533  %a = tail call <4 x i32> @llvm.umax.v4i32(<4 x i32> %0, <4 x i32> %1)
534  %b = tail call <4 x i32> @llvm.umin.v4i32(<4 x i32> %0, <4 x i32> %1)
535  %sub = sub <4 x i32> %a, %b
536  ret <4 x i32> %sub
537}
538
539define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
540; CHECK-LABEL: umaxmin_v2i64:
541; CHECK:       @ %bb.0:
542; CHECK-NEXT:    vqsub.u64 q8, q1, q0
543; CHECK-NEXT:    vqsub.u64 q9, q0, q1
544; CHECK-NEXT:    vorr q0, q9, q8
545; CHECK-NEXT:    bx lr
546  %a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
547  %b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)
548  %sub = sub <2 x i64> %a, %b
549  ret <2 x i64> %sub
550}
551
552define <16 x i8> @umaxmin_v16i8_com1(<16 x i8> %0, <16 x i8> %1) {
553; CHECK-LABEL: umaxmin_v16i8_com1:
554; CHECK:       @ %bb.0:
555; CHECK-NEXT:    vabd.u8 q0, q0, q1
556; CHECK-NEXT:    bx lr
557  %a = tail call <16 x i8> @llvm.umax.v16i8(<16 x i8> %0, <16 x i8> %1)
558  %b = tail call <16 x i8> @llvm.umin.v16i8(<16 x i8> %1, <16 x i8> %0)
559  %sub = sub <16 x i8> %a, %b
560  ret <16 x i8> %sub
561}
562